Skip -fwhole-program when merging LTO options.
[official-gcc.git] / gcc / tree-vect-patterns.cc
blob32f95a7a40979f4503446404b503ec84d5837521
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimple-fold.h"
39 #include "gimplify-me.h"
40 #include "cfgloop.h"
41 #include "tree-vectorizer.h"
42 #include "dumpfile.h"
43 #include "builtins.h"
44 #include "internal-fn.h"
45 #include "case-cfn-macros.h"
46 #include "fold-const-call.h"
47 #include "attribs.h"
48 #include "cgraph.h"
49 #include "omp-simd-clone.h"
50 #include "predict.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "gimple-range.h"
56 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
57 in the first operand. Disentangling this is future work, the
58 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
61 /* Return true if we have a useful VR_RANGE range for VAR, storing it
62 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
64 static bool
65 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
67 value_range vr;
68 get_range_query (cfun)->range_of_expr (vr, var);
69 if (vr.undefined_p ())
70 vr.set_varying (TREE_TYPE (var));
71 *min_value = wi::to_wide (vr.min ());
72 *max_value = wi::to_wide (vr.max ());
73 value_range_kind vr_type = vr.kind ();
74 wide_int nonzero = get_nonzero_bits (var);
75 signop sgn = TYPE_SIGN (TREE_TYPE (var));
76 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
77 nonzero, sgn) == VR_RANGE)
79 if (dump_enabled_p ())
81 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
82 dump_printf (MSG_NOTE, " has range [");
83 dump_hex (MSG_NOTE, *min_value);
84 dump_printf (MSG_NOTE, ", ");
85 dump_hex (MSG_NOTE, *max_value);
86 dump_printf (MSG_NOTE, "]\n");
88 return true;
90 else
92 if (dump_enabled_p ())
94 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
95 dump_printf (MSG_NOTE, " has no range info\n");
97 return false;
101 /* Report that we've found an instance of pattern PATTERN in
102 statement STMT. */
104 static void
105 vect_pattern_detected (const char *name, gimple *stmt)
107 if (dump_enabled_p ())
108 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
111 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
112 return the pattern statement's stmt_vec_info. Set its vector type to
113 VECTYPE if it doesn't have one already. */
115 static stmt_vec_info
116 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
117 stmt_vec_info orig_stmt_info, tree vectype)
119 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
120 if (pattern_stmt_info == NULL)
121 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
122 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
124 pattern_stmt_info->pattern_stmt_p = true;
125 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
126 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
127 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
128 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
130 gcc_assert (!vectype
131 || (VECTOR_BOOLEAN_TYPE_P (vectype)
132 == vect_use_mask_type_p (orig_stmt_info)));
133 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
134 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
136 return pattern_stmt_info;
139 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
140 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
141 have one already. */
143 static void
144 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
145 stmt_vec_info orig_stmt_info, tree vectype)
147 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
148 STMT_VINFO_RELATED_STMT (orig_stmt_info)
149 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
152 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
153 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
154 be different from the vector type of the final pattern statement.
155 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
156 from which it was derived. */
158 static inline void
159 append_pattern_def_seq (vec_info *vinfo,
160 stmt_vec_info stmt_info, gimple *new_stmt,
161 tree vectype = NULL_TREE,
162 tree scalar_type_for_mask = NULL_TREE)
164 gcc_assert (!scalar_type_for_mask
165 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
166 if (vectype)
168 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
169 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
170 if (scalar_type_for_mask)
171 new_stmt_info->mask_precision
172 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
174 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
175 new_stmt);
178 /* The caller wants to perform new operations on vect_external variable
179 VAR, so that the result of the operations would also be vect_external.
180 Return the edge on which the operations can be performed, if one exists.
181 Return null if the operations should instead be treated as part of
182 the pattern that needs them. */
184 static edge
185 vect_get_external_def_edge (vec_info *vinfo, tree var)
187 edge e = NULL;
188 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
190 e = loop_preheader_edge (loop_vinfo->loop);
191 if (!SSA_NAME_IS_DEFAULT_DEF (var))
193 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
194 if (bb == NULL
195 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
196 e = NULL;
199 return e;
202 /* Return true if the target supports a vector version of CODE,
203 where CODE is known to map to a direct optab with the given SUBTYPE.
204 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
205 specifies the type of the scalar result.
207 If CODE allows the inputs and outputs to have different type
208 (such as for WIDEN_SUM_EXPR), it is the input mode rather
209 than the output mode that determines the appropriate target pattern.
210 Operand 0 of the target pattern then specifies the mode that the output
211 must have.
213 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
214 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
215 is nonnull. */
217 static bool
218 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
219 tree itype, tree *vecotype_out,
220 tree *vecitype_out = NULL,
221 enum optab_subtype subtype = optab_default)
223 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
224 if (!vecitype)
225 return false;
227 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
228 if (!vecotype)
229 return false;
231 optab optab = optab_for_tree_code (code, vecitype, subtype);
232 if (!optab)
233 return false;
235 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
236 if (icode == CODE_FOR_nothing
237 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
238 return false;
240 *vecotype_out = vecotype;
241 if (vecitype_out)
242 *vecitype_out = vecitype;
243 return true;
246 /* Round bit precision PRECISION up to a full element. */
248 static unsigned int
249 vect_element_precision (unsigned int precision)
251 precision = 1 << ceil_log2 (precision);
252 return MAX (precision, BITS_PER_UNIT);
255 /* If OP is defined by a statement that's being considered for vectorization,
256 return information about that statement, otherwise return NULL. */
258 static stmt_vec_info
259 vect_get_internal_def (vec_info *vinfo, tree op)
261 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
262 if (def_stmt_info
263 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
264 return def_stmt_info;
265 return NULL;
268 /* Check whether NAME, an ssa-name used in STMT_VINFO,
269 is a result of a type promotion, such that:
270 DEF_STMT: NAME = NOP (name0)
271 If CHECK_SIGN is TRUE, check that either both types are signed or both are
272 unsigned. */
274 static bool
275 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
276 tree *orig_type, gimple **def_stmt, bool *promotion)
278 tree type = TREE_TYPE (name);
279 tree oprnd0;
280 enum vect_def_type dt;
282 stmt_vec_info def_stmt_info;
283 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
284 return false;
286 if (dt != vect_internal_def
287 && dt != vect_external_def && dt != vect_constant_def)
288 return false;
290 if (!*def_stmt)
291 return false;
293 if (!is_gimple_assign (*def_stmt))
294 return false;
296 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
297 return false;
299 oprnd0 = gimple_assign_rhs1 (*def_stmt);
301 *orig_type = TREE_TYPE (oprnd0);
302 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
303 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
304 return false;
306 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
307 *promotion = true;
308 else
309 *promotion = false;
311 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
312 return false;
314 return true;
317 /* Holds information about an input operand after some sign changes
318 and type promotions have been peeled away. */
319 class vect_unpromoted_value {
320 public:
321 vect_unpromoted_value ();
323 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
325 /* The value obtained after peeling away zero or more casts. */
326 tree op;
328 /* The type of OP. */
329 tree type;
331 /* The definition type of OP. */
332 vect_def_type dt;
334 /* If OP is the result of peeling at least one cast, and if the cast
335 of OP itself is a vectorizable statement, CASTER identifies that
336 statement, otherwise it is null. */
337 stmt_vec_info caster;
340 inline vect_unpromoted_value::vect_unpromoted_value ()
341 : op (NULL_TREE),
342 type (NULL_TREE),
343 dt (vect_uninitialized_def),
344 caster (NULL)
348 /* Set the operand to OP_IN, its definition type to DT_IN, and the
349 statement that casts it to CASTER_IN. */
351 inline void
352 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
353 stmt_vec_info caster_in)
355 op = op_in;
356 type = TREE_TYPE (op);
357 dt = dt_in;
358 caster = caster_in;
361 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
362 to reach some vectorizable inner operand OP', continuing as long as it
363 is possible to convert OP' back to OP using a possible sign change
364 followed by a possible promotion P. Return this OP', or null if OP is
365 not a vectorizable SSA name. If there is a promotion P, describe its
366 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
367 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
368 have more than one user.
370 A successful return means that it is possible to go from OP' to OP
371 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
372 whereas the cast from UNPROM to OP might be a promotion, a sign
373 change, or a nop.
375 E.g. say we have:
377 signed short *ptr = ...;
378 signed short C = *ptr;
379 unsigned short B = (unsigned short) C; // sign change
380 signed int A = (signed int) B; // unsigned promotion
381 ...possible other uses of A...
382 unsigned int OP = (unsigned int) A; // sign change
384 In this case it's possible to go directly from C to OP using:
386 OP = (unsigned int) (unsigned short) C;
387 +------------+ +--------------+
388 promotion sign change
390 so OP' would be C. The input to the promotion is B, so UNPROM
391 would describe B. */
393 static tree
394 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
395 vect_unpromoted_value *unprom,
396 bool *single_use_p = NULL)
398 tree res = NULL_TREE;
399 tree op_type = TREE_TYPE (op);
400 unsigned int orig_precision = TYPE_PRECISION (op_type);
401 unsigned int min_precision = orig_precision;
402 stmt_vec_info caster = NULL;
403 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
405 /* See whether OP is simple enough to vectorize. */
406 stmt_vec_info def_stmt_info;
407 gimple *def_stmt;
408 vect_def_type dt;
409 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
410 break;
412 /* If OP is the input of a demotion, skip over it to see whether
413 OP is itself the result of a promotion. If so, the combined
414 effect of the promotion and the demotion might fit the required
415 pattern, otherwise neither operation fits.
417 This copes with cases such as the result of an arithmetic
418 operation being truncated before being stored, and where that
419 arithmetic operation has been recognized as an over-widened one. */
420 if (TYPE_PRECISION (op_type) <= min_precision)
422 /* Use OP as the UNPROM described above if we haven't yet
423 found a promotion, or if using the new input preserves the
424 sign of the previous promotion. */
425 if (!res
426 || TYPE_PRECISION (unprom->type) == orig_precision
427 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
429 unprom->set_op (op, dt, caster);
430 min_precision = TYPE_PRECISION (op_type);
432 /* Stop if we've already seen a promotion and if this
433 conversion does more than change the sign. */
434 else if (TYPE_PRECISION (op_type)
435 != TYPE_PRECISION (unprom->type))
436 break;
438 /* The sequence now extends to OP. */
439 res = op;
442 /* See whether OP is defined by a cast. Record it as CASTER if
443 the cast is potentially vectorizable. */
444 if (!def_stmt)
445 break;
446 caster = def_stmt_info;
448 /* Ignore pattern statements, since we don't link uses for them. */
449 if (caster
450 && single_use_p
451 && !STMT_VINFO_RELATED_STMT (caster)
452 && !has_single_use (res))
453 *single_use_p = false;
455 gassign *assign = dyn_cast <gassign *> (def_stmt);
456 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
457 break;
459 /* Continue with the input to the cast. */
460 op = gimple_assign_rhs1 (def_stmt);
461 op_type = TREE_TYPE (op);
463 return res;
466 /* OP is an integer operand to an operation that returns TYPE, and we
467 want to treat the operation as a widening one. So far we can treat
468 it as widening from *COMMON_TYPE.
470 Return true if OP is suitable for such a widening operation,
471 either widening from *COMMON_TYPE or from some supertype of it.
472 Update *COMMON_TYPE to the supertype in the latter case.
474 SHIFT_P is true if OP is a shift amount. */
476 static bool
477 vect_joust_widened_integer (tree type, bool shift_p, tree op,
478 tree *common_type)
480 /* Calculate the minimum precision required by OP, without changing
481 the sign of either operand. */
482 unsigned int precision;
483 if (shift_p)
485 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
486 return false;
487 precision = TREE_INT_CST_LOW (op);
489 else
491 precision = wi::min_precision (wi::to_widest (op),
492 TYPE_SIGN (*common_type));
493 if (precision * 2 > TYPE_PRECISION (type))
494 return false;
497 /* If OP requires a wider type, switch to that type. The checks
498 above ensure that this is still narrower than the result. */
499 precision = vect_element_precision (precision);
500 if (TYPE_PRECISION (*common_type) < precision)
501 *common_type = build_nonstandard_integer_type
502 (precision, TYPE_UNSIGNED (*common_type));
503 return true;
506 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
507 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
509 static bool
510 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
512 if (types_compatible_p (*common_type, new_type))
513 return true;
515 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
516 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
517 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
518 return true;
520 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
521 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
522 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
524 *common_type = new_type;
525 return true;
528 /* We have mismatched signs, with the signed type being
529 no wider than the unsigned type. In this case we need
530 a wider signed type. */
531 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
532 TYPE_PRECISION (new_type));
533 precision *= 2;
535 if (precision * 2 > TYPE_PRECISION (type))
536 return false;
538 *common_type = build_nonstandard_integer_type (precision, false);
539 return true;
542 /* Check whether STMT_INFO can be viewed as a tree of integer operations
543 in which each node either performs CODE or WIDENED_CODE, and where
544 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
545 specifies the maximum number of leaf operands. SHIFT_P says whether
546 CODE and WIDENED_CODE are some sort of shift.
548 If STMT_INFO is such a tree, return the number of leaf operands
549 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
550 to a type that (a) is narrower than the result of STMT_INFO and
551 (b) can hold all leaf operand values.
553 If SUBTYPE then allow that the signs of the operands
554 may differ in signs but not in precision. SUBTYPE is updated to reflect
555 this.
557 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
558 exists. */
560 static unsigned int
561 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
562 tree_code widened_code, bool shift_p,
563 unsigned int max_nops,
564 vect_unpromoted_value *unprom, tree *common_type,
565 enum optab_subtype *subtype = NULL)
567 /* Check for an integer operation with the right code. */
568 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
569 if (!assign)
570 return 0;
572 tree_code rhs_code = gimple_assign_rhs_code (assign);
573 if (rhs_code != code && rhs_code != widened_code)
574 return 0;
576 tree type = TREE_TYPE (gimple_assign_lhs (assign));
577 if (!INTEGRAL_TYPE_P (type))
578 return 0;
580 /* Assume that both operands will be leaf operands. */
581 max_nops -= 2;
583 /* Check the operands. */
584 unsigned int next_op = 0;
585 for (unsigned int i = 0; i < 2; ++i)
587 vect_unpromoted_value *this_unprom = &unprom[next_op];
588 unsigned int nops = 1;
589 tree op = gimple_op (assign, i + 1);
590 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
592 /* We already have a common type from earlier operands.
593 Update it to account for OP. */
594 this_unprom->set_op (op, vect_constant_def);
595 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
596 return 0;
598 else
600 /* Only allow shifts by constants. */
601 if (shift_p && i == 1)
602 return 0;
604 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
605 return 0;
607 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
609 /* The operand isn't widened. If STMT_INFO has the code
610 for an unwidened operation, recursively check whether
611 this operand is a node of the tree. */
612 if (rhs_code != code
613 || max_nops == 0
614 || this_unprom->dt != vect_internal_def)
615 return 0;
617 /* Give back the leaf slot allocated above now that we're
618 not treating this as a leaf operand. */
619 max_nops += 1;
621 /* Recursively process the definition of the operand. */
622 stmt_vec_info def_stmt_info
623 = vinfo->lookup_def (this_unprom->op);
624 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
625 widened_code, shift_p, max_nops,
626 this_unprom, common_type,
627 subtype);
628 if (nops == 0)
629 return 0;
631 max_nops -= nops;
633 else
635 /* Make sure that the operand is narrower than the result. */
636 if (TYPE_PRECISION (this_unprom->type) * 2
637 > TYPE_PRECISION (type))
638 return 0;
640 /* Update COMMON_TYPE for the new operand. */
641 if (i == 0)
642 *common_type = this_unprom->type;
643 else if (!vect_joust_widened_type (type, this_unprom->type,
644 common_type))
646 if (subtype)
648 /* See if we can sign extend the smaller type. */
649 if (TYPE_PRECISION (this_unprom->type)
650 > TYPE_PRECISION (*common_type))
651 *common_type = this_unprom->type;
652 *subtype = optab_vector_mixed_sign;
654 else
655 return 0;
659 next_op += nops;
661 return next_op;
664 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
665 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
667 static tree
668 vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
670 return make_temp_ssa_name (type, stmt, "patt");
673 /* STMT2_INFO describes a type conversion that could be split into STMT1
674 followed by a version of STMT2_INFO that takes NEW_RHS as its first
675 input. Try to do this using pattern statements, returning true on
676 success. */
678 static bool
679 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
680 gimple *stmt1, tree vectype)
682 if (is_pattern_stmt_p (stmt2_info))
684 /* STMT2_INFO is part of a pattern. Get the statement to which
685 the pattern is attached. */
686 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
687 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
689 if (dump_enabled_p ())
690 dump_printf_loc (MSG_NOTE, vect_location,
691 "Splitting pattern statement: %G", stmt2_info->stmt);
693 /* Since STMT2_INFO is a pattern statement, we can change it
694 in-situ without worrying about changing the code for the
695 containing block. */
696 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
698 if (dump_enabled_p ())
700 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
701 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
702 stmt2_info->stmt);
705 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
706 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
707 /* STMT2_INFO is the actual pattern statement. Add STMT1
708 to the end of the definition sequence. */
709 gimple_seq_add_stmt_without_update (def_seq, stmt1);
710 else
712 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
713 before it. */
714 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
715 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
717 return true;
719 else
721 /* STMT2_INFO doesn't yet have a pattern. Try to create a
722 two-statement pattern now. */
723 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
724 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
725 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
726 if (!lhs_vectype)
727 return false;
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_NOTE, vect_location,
731 "Splitting statement: %G", stmt2_info->stmt);
733 /* Add STMT1 as a singleton pattern definition sequence. */
734 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
735 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
736 gimple_seq_add_stmt_without_update (def_seq, stmt1);
738 /* Build the second of the two pattern statements. */
739 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
740 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
741 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
743 if (dump_enabled_p ())
745 dump_printf_loc (MSG_NOTE, vect_location,
746 "into pattern statements: %G", stmt1);
747 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
748 (gimple *) new_stmt2);
751 return true;
755 /* Convert UNPROM to TYPE and return the result, adding new statements
756 to STMT_INFO's pattern definition statements if no better way is
757 available. VECTYPE is the vector form of TYPE.
759 If SUBTYPE then convert the type based on the subtype. */
761 static tree
762 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
763 vect_unpromoted_value *unprom, tree vectype,
764 enum optab_subtype subtype = optab_default)
766 /* Update the type if the signs differ. */
767 if (subtype == optab_vector_mixed_sign)
769 gcc_assert (!TYPE_UNSIGNED (type));
770 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
772 type = unsigned_type_for (type);
773 vectype = unsigned_type_for (vectype);
777 /* Check for a no-op conversion. */
778 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
779 return unprom->op;
781 /* Allow the caller to create constant vect_unpromoted_values. */
782 if (TREE_CODE (unprom->op) == INTEGER_CST)
783 return wide_int_to_tree (type, wi::to_widest (unprom->op));
785 tree input = unprom->op;
786 if (unprom->caster)
788 tree lhs = gimple_get_lhs (unprom->caster->stmt);
789 tree lhs_type = TREE_TYPE (lhs);
791 /* If the result of the existing cast is the right width, use it
792 instead of the source of the cast. */
793 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
794 input = lhs;
795 /* If the precision we want is between the source and result
796 precisions of the existing cast, try splitting the cast into
797 two and tapping into a mid-way point. */
798 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
799 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
801 /* In order to preserve the semantics of the original cast,
802 give the mid-way point the same signedness as the input value.
804 It would be possible to use a signed type here instead if
805 TYPE is signed and UNPROM->TYPE is unsigned, but that would
806 make the sign of the midtype sensitive to the order in
807 which we process the statements, since the signedness of
808 TYPE is the signedness required by just one of possibly
809 many users. Also, unsigned promotions are usually as cheap
810 as or cheaper than signed ones, so it's better to keep an
811 unsigned promotion. */
812 tree midtype = build_nonstandard_integer_type
813 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
814 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
815 if (vec_midtype)
817 input = vect_recog_temp_ssa_var (midtype, NULL);
818 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
819 unprom->op);
820 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
821 vec_midtype))
822 append_pattern_def_seq (vinfo, stmt_info,
823 new_stmt, vec_midtype);
827 /* See if we can reuse an existing result. */
828 if (types_compatible_p (type, TREE_TYPE (input)))
829 return input;
832 /* We need a new conversion statement. */
833 tree new_op = vect_recog_temp_ssa_var (type, NULL);
834 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
836 /* If OP is an external value, see if we can insert the new statement
837 on an incoming edge. */
838 if (input == unprom->op && unprom->dt == vect_external_def)
839 if (edge e = vect_get_external_def_edge (vinfo, input))
841 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
842 gcc_assert (!new_bb);
843 return new_op;
846 /* As a (common) last resort, add the statement to the pattern itself. */
847 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
848 return new_op;
851 /* Invoke vect_convert_input for N elements of UNPROM and store the
852 result in the corresponding elements of RESULT.
854 If SUBTYPE then convert the type based on the subtype. */
856 static void
857 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
858 tree *result, tree type, vect_unpromoted_value *unprom,
859 tree vectype, enum optab_subtype subtype = optab_default)
861 for (unsigned int i = 0; i < n; ++i)
863 unsigned int j;
864 for (j = 0; j < i; ++j)
865 if (unprom[j].op == unprom[i].op)
866 break;
868 if (j < i)
869 result[i] = result[j];
870 else
871 result[i] = vect_convert_input (vinfo, stmt_info,
872 type, &unprom[i], vectype, subtype);
876 /* The caller has created a (possibly empty) sequence of pattern definition
877 statements followed by a single statement PATTERN_STMT. Cast the result
878 of this final statement to TYPE. If a new statement is needed, add
879 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
880 and return the new statement, otherwise return PATTERN_STMT as-is.
881 VECITYPE is the vector form of PATTERN_STMT's result type. */
883 static gimple *
884 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
885 gimple *pattern_stmt, tree vecitype)
887 tree lhs = gimple_get_lhs (pattern_stmt);
888 if (!types_compatible_p (type, TREE_TYPE (lhs)))
890 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
891 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
892 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
894 return pattern_stmt;
897 /* Return true if STMT_VINFO describes a reduction for which reassociation
898 is allowed. If STMT_INFO is part of a group, assume that it's part of
899 a reduction chain and optimistically assume that all statements
900 except the last allow reassociation.
901 Also require it to have code CODE and to be a reduction
902 in the outermost loop. When returning true, store the operands in
903 *OP0_OUT and *OP1_OUT. */
905 static bool
906 vect_reassociating_reduction_p (vec_info *vinfo,
907 stmt_vec_info stmt_info, tree_code code,
908 tree *op0_out, tree *op1_out)
910 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
911 if (!loop_info)
912 return false;
914 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
915 if (!assign || gimple_assign_rhs_code (assign) != code)
916 return false;
918 /* We don't allow changing the order of the computation in the inner-loop
919 when doing outer-loop vectorization. */
920 class loop *loop = LOOP_VINFO_LOOP (loop_info);
921 if (loop && nested_in_vect_loop_p (loop, stmt_info))
922 return false;
924 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
926 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
927 code))
928 return false;
930 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
931 return false;
933 *op0_out = gimple_assign_rhs1 (assign);
934 *op1_out = gimple_assign_rhs2 (assign);
935 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
936 std::swap (*op0_out, *op1_out);
937 return true;
940 /* match.pd function to match
941 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
942 with conditions:
943 1) @1, @2, c, d, a, b are all integral type.
944 2) There's single_use for both @1 and @2.
945 3) a, c have same precision.
946 4) c and @1 have different precision.
947 5) c, d are the same type or they can differ in sign when convert is
948 truncation.
950 record a and c and d and @3. */
952 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
954 /* Function vect_recog_cond_expr_convert
956 Try to find the following pattern:
958 TYPE_AB A,B;
959 TYPE_CD C,D;
960 TYPE_E E;
961 TYPE_E op_true = (TYPE_E) A;
962 TYPE_E op_false = (TYPE_E) B;
964 E = C cmp D ? op_true : op_false;
966 where
967 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
968 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
969 single_use of op_true and op_false.
970 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
972 Input:
974 * STMT_VINFO: The stmt from which the pattern search begins.
975 here it starts with E = c cmp D ? op_true : op_false;
977 Output:
979 TYPE1 E' = C cmp D ? A : B;
980 TYPE3 E = (TYPE3) E';
982 There may extra nop_convert for A or B to handle different signness.
984 * TYPE_OUT: The vector type of the output of this pattern.
986 * Return value: A new stmt that will be used to replace the sequence of
987 stmts that constitute the pattern. In this case it will be:
988 E = (TYPE3)E';
989 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
991 static gimple *
992 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
993 stmt_vec_info stmt_vinfo, tree *type_out)
995 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
996 tree lhs, match[4], temp, type, new_lhs, op2;
997 gimple *cond_stmt;
998 gimple *pattern_stmt;
1000 if (!last_stmt)
1001 return NULL;
1003 lhs = gimple_assign_lhs (last_stmt);
1005 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1006 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1007 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1008 return NULL;
1010 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1012 op2 = match[2];
1013 type = TREE_TYPE (match[1]);
1014 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1016 op2 = vect_recog_temp_ssa_var (type, NULL);
1017 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1018 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1019 get_vectype_for_scalar_type (vinfo, type));
1022 temp = vect_recog_temp_ssa_var (type, NULL);
1023 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1024 match[1], op2));
1025 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1026 get_vectype_for_scalar_type (vinfo, type));
1027 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1028 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1029 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1031 if (dump_enabled_p ())
1032 dump_printf_loc (MSG_NOTE, vect_location,
1033 "created pattern stmt: %G", pattern_stmt);
1034 return pattern_stmt;
1037 /* Function vect_recog_dot_prod_pattern
1039 Try to find the following pattern:
1041 type1a x_t
1042 type1b y_t;
1043 TYPE1 prod;
1044 TYPE2 sum = init;
1045 loop:
1046 sum_0 = phi <init, sum_1>
1047 S1 x_t = ...
1048 S2 y_t = ...
1049 S3 x_T = (TYPE1) x_t;
1050 S4 y_T = (TYPE1) y_t;
1051 S5 prod = x_T * y_T;
1052 [S6 prod = (TYPE2) prod; #optional]
1053 S7 sum_1 = prod + sum_0;
1055 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1056 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1057 'type1a' and 'type1b' can differ.
1059 Input:
1061 * STMT_VINFO: The stmt from which the pattern search begins. In the
1062 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1063 will be detected.
1065 Output:
1067 * TYPE_OUT: The type of the output of this pattern.
1069 * Return value: A new stmt that will be used to replace the sequence of
1070 stmts that constitute the pattern. In this case it will be:
1071 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1073 Note: The dot-prod idiom is a widening reduction pattern that is
1074 vectorized without preserving all the intermediate results. It
1075 produces only N/2 (widened) results (by summing up pairs of
1076 intermediate results) rather than all N results. Therefore, we
1077 cannot allow this pattern when we want to get all the results and in
1078 the correct order (as is the case when this computation is in an
1079 inner-loop nested in an outer-loop that us being vectorized). */
1081 static gimple *
1082 vect_recog_dot_prod_pattern (vec_info *vinfo,
1083 stmt_vec_info stmt_vinfo, tree *type_out)
1085 tree oprnd0, oprnd1;
1086 gimple *last_stmt = stmt_vinfo->stmt;
1087 tree type, half_type;
1088 gimple *pattern_stmt;
1089 tree var;
1091 /* Look for the following pattern
1092 DX = (TYPE1) X;
1093 DY = (TYPE1) Y;
1094 DPROD = DX * DY;
1095 DDPROD = (TYPE2) DPROD;
1096 sum_1 = DDPROD + sum_0;
1097 In which
1098 - DX is double the size of X
1099 - DY is double the size of Y
1100 - DX, DY, DPROD all have the same type but the sign
1101 between X, Y and DPROD can differ.
1102 - sum is the same size of DPROD or bigger
1103 - sum has been recognized as a reduction variable.
1105 This is equivalent to:
1106 DPROD = X w* Y; #widen mult
1107 sum_1 = DPROD w+ sum_0; #widen summation
1109 DPROD = X w* Y; #widen mult
1110 sum_1 = DPROD + sum_0; #summation
1113 /* Starting from LAST_STMT, follow the defs of its uses in search
1114 of the above pattern. */
1116 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1117 &oprnd0, &oprnd1))
1118 return NULL;
1120 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1122 vect_unpromoted_value unprom_mult;
1123 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1125 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1126 we know that oprnd1 is the reduction variable (defined by a loop-header
1127 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1128 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1129 if (!oprnd0)
1130 return NULL;
1132 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1133 if (!mult_vinfo)
1134 return NULL;
1136 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1137 inside the loop (in case we are analyzing an outer-loop). */
1138 vect_unpromoted_value unprom0[2];
1139 enum optab_subtype subtype = optab_vector;
1140 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1141 false, 2, unprom0, &half_type, &subtype))
1142 return NULL;
1144 /* If there are two widening operations, make sure they agree on the sign
1145 of the extension. The result of an optab_vector_mixed_sign operation
1146 is signed; otherwise, the result has the same sign as the operands. */
1147 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1148 && (subtype == optab_vector_mixed_sign
1149 ? TYPE_UNSIGNED (unprom_mult.type)
1150 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1151 return NULL;
1153 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1155 /* If the inputs have mixed signs, canonicalize on using the signed
1156 input type for analysis. This also helps when emulating mixed-sign
1157 operations using signed operations. */
1158 if (subtype == optab_vector_mixed_sign)
1159 half_type = signed_type_for (half_type);
1161 tree half_vectype;
1162 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1163 type_out, &half_vectype, subtype))
1165 /* We can emulate a mixed-sign dot-product using a sequence of
1166 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1167 if (subtype != optab_vector_mixed_sign
1168 || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1169 DOT_PROD_EXPR, half_type,
1170 type_out, &half_vectype,
1171 optab_vector))
1172 return NULL;
1174 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1175 *type_out);
1178 /* Get the inputs in the appropriate types. */
1179 tree mult_oprnd[2];
1180 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1181 unprom0, half_vectype, subtype);
1183 var = vect_recog_temp_ssa_var (type, NULL);
1184 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1185 mult_oprnd[0], mult_oprnd[1], oprnd1);
1187 return pattern_stmt;
1191 /* Function vect_recog_sad_pattern
1193 Try to find the following Sum of Absolute Difference (SAD) pattern:
1195 type x_t, y_t;
1196 signed TYPE1 diff, abs_diff;
1197 TYPE2 sum = init;
1198 loop:
1199 sum_0 = phi <init, sum_1>
1200 S1 x_t = ...
1201 S2 y_t = ...
1202 S3 x_T = (TYPE1) x_t;
1203 S4 y_T = (TYPE1) y_t;
1204 S5 diff = x_T - y_T;
1205 S6 abs_diff = ABS_EXPR <diff>;
1206 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1207 S8 sum_1 = abs_diff + sum_0;
1209 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1210 same size of 'TYPE1' or bigger. This is a special case of a reduction
1211 computation.
1213 Input:
1215 * STMT_VINFO: The stmt from which the pattern search begins. In the
1216 example, when this function is called with S8, the pattern
1217 {S3,S4,S5,S6,S7,S8} will be detected.
1219 Output:
1221 * TYPE_OUT: The type of the output of this pattern.
1223 * Return value: A new stmt that will be used to replace the sequence of
1224 stmts that constitute the pattern. In this case it will be:
1225 SAD_EXPR <x_t, y_t, sum_0>
1228 static gimple *
1229 vect_recog_sad_pattern (vec_info *vinfo,
1230 stmt_vec_info stmt_vinfo, tree *type_out)
1232 gimple *last_stmt = stmt_vinfo->stmt;
1233 tree half_type;
1235 /* Look for the following pattern
1236 DX = (TYPE1) X;
1237 DY = (TYPE1) Y;
1238 DDIFF = DX - DY;
1239 DAD = ABS_EXPR <DDIFF>;
1240 DDPROD = (TYPE2) DPROD;
1241 sum_1 = DAD + sum_0;
1242 In which
1243 - DX is at least double the size of X
1244 - DY is at least double the size of Y
1245 - DX, DY, DDIFF, DAD all have the same type
1246 - sum is the same size of DAD or bigger
1247 - sum has been recognized as a reduction variable.
1249 This is equivalent to:
1250 DDIFF = X w- Y; #widen sub
1251 DAD = ABS_EXPR <DDIFF>;
1252 sum_1 = DAD w+ sum_0; #widen summation
1254 DDIFF = X w- Y; #widen sub
1255 DAD = ABS_EXPR <DDIFF>;
1256 sum_1 = DAD + sum_0; #summation
1259 /* Starting from LAST_STMT, follow the defs of its uses in search
1260 of the above pattern. */
1262 tree plus_oprnd0, plus_oprnd1;
1263 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1264 &plus_oprnd0, &plus_oprnd1))
1265 return NULL;
1267 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1269 /* Any non-truncating sequence of conversions is OK here, since
1270 with a successful match, the result of the ABS(U) is known to fit
1271 within the nonnegative range of the result type. (It cannot be the
1272 negative of the minimum signed value due to the range of the widening
1273 MINUS_EXPR.) */
1274 vect_unpromoted_value unprom_abs;
1275 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1276 &unprom_abs);
1278 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1279 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1280 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1281 Then check that plus_oprnd0 is defined by an abs_expr. */
1283 if (!plus_oprnd0)
1284 return NULL;
1286 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1287 if (!abs_stmt_vinfo)
1288 return NULL;
1290 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1291 inside the loop (in case we are analyzing an outer-loop). */
1292 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1293 if (!abs_stmt
1294 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1295 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1296 return NULL;
1298 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1299 tree abs_type = TREE_TYPE (abs_oprnd);
1300 if (TYPE_UNSIGNED (abs_type))
1301 return NULL;
1303 /* Peel off conversions from the ABS input. This can involve sign
1304 changes (e.g. from an unsigned subtraction to a signed ABS input)
1305 or signed promotion, but it can't include unsigned promotion.
1306 (Note that ABS of an unsigned promotion should have been folded
1307 away before now anyway.) */
1308 vect_unpromoted_value unprom_diff;
1309 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1310 &unprom_diff);
1311 if (!abs_oprnd)
1312 return NULL;
1313 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1314 && TYPE_UNSIGNED (unprom_diff.type))
1315 return NULL;
1317 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1318 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1319 if (!diff_stmt_vinfo)
1320 return NULL;
1322 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1323 inside the loop (in case we are analyzing an outer-loop). */
1324 vect_unpromoted_value unprom[2];
1325 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1326 false, 2, unprom, &half_type))
1327 return NULL;
1329 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1331 tree half_vectype;
1332 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1333 type_out, &half_vectype))
1334 return NULL;
1336 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1337 tree sad_oprnd[2];
1338 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1339 unprom, half_vectype);
1341 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1342 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1343 sad_oprnd[1], plus_oprnd1);
1345 return pattern_stmt;
1348 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1349 so that it can be treated as though it had the form:
1351 A_TYPE a;
1352 B_TYPE b;
1353 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1354 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1355 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1356 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1357 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1359 Try to replace the pattern with:
1361 A_TYPE a;
1362 B_TYPE b;
1363 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1364 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1365 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1366 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1368 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1370 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1371 name of the pattern being matched, for dump purposes. */
1373 static gimple *
1374 vect_recog_widen_op_pattern (vec_info *vinfo,
1375 stmt_vec_info last_stmt_info, tree *type_out,
1376 tree_code orig_code, tree_code wide_code,
1377 bool shift_p, const char *name)
1379 gimple *last_stmt = last_stmt_info->stmt;
1381 vect_unpromoted_value unprom[2];
1382 tree half_type;
1383 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1384 shift_p, 2, unprom, &half_type))
1385 return NULL;
1387 /* Pattern detected. */
1388 vect_pattern_detected (name, last_stmt);
1390 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1391 tree itype = type;
1392 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1393 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1394 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1395 TYPE_UNSIGNED (half_type));
1397 /* Check target support */
1398 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1399 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1400 tree ctype = itype;
1401 tree vecctype = vecitype;
1402 if (orig_code == MINUS_EXPR
1403 && TYPE_UNSIGNED (itype)
1404 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1406 /* Subtraction is special, even if half_type is unsigned and no matter
1407 whether type is signed or unsigned, if type is wider than itype,
1408 we need to sign-extend from the widening operation result to the
1409 result type.
1410 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1411 itype unsigned short and type either int or unsigned int.
1412 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1413 (unsigned short) 0xffff, but for type int we want the result -1
1414 and for type unsigned int 0xffffffff rather than 0xffff. */
1415 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1416 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1419 enum tree_code dummy_code;
1420 int dummy_int;
1421 auto_vec<tree> dummy_vec;
1422 if (!vectype
1423 || !vecitype
1424 || !vecctype
1425 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1426 vecitype, vectype,
1427 &dummy_code, &dummy_code,
1428 &dummy_int, &dummy_vec))
1429 return NULL;
1431 *type_out = get_vectype_for_scalar_type (vinfo, type);
1432 if (!*type_out)
1433 return NULL;
1435 tree oprnd[2];
1436 vect_convert_inputs (vinfo, last_stmt_info,
1437 2, oprnd, half_type, unprom, vectype);
1439 tree var = vect_recog_temp_ssa_var (itype, NULL);
1440 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1441 oprnd[0], oprnd[1]);
1443 if (vecctype != vecitype)
1444 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1445 pattern_stmt, vecitype);
1447 return vect_convert_output (vinfo, last_stmt_info,
1448 type, pattern_stmt, vecctype);
1451 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1452 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1454 static gimple *
1455 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1456 tree *type_out)
1458 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1459 MULT_EXPR, WIDEN_MULT_EXPR, false,
1460 "vect_recog_widen_mult_pattern");
1463 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1464 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1466 static gimple *
1467 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1468 tree *type_out)
1470 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1471 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1472 "vect_recog_widen_plus_pattern");
1475 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1476 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1477 static gimple *
1478 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1479 tree *type_out)
1481 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1482 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1483 "vect_recog_widen_minus_pattern");
1486 /* Function vect_recog_popcount_pattern
1488 Try to find the following pattern:
1490 UTYPE1 A;
1491 TYPE1 B;
1492 UTYPE2 temp_in;
1493 TYPE3 temp_out;
1494 temp_in = (UTYPE2)A;
1496 temp_out = __builtin_popcount{,l,ll} (temp_in);
1497 B = (TYPE1) temp_out;
1499 TYPE2 may or may not be equal to TYPE3.
1500 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1501 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1503 Input:
1505 * STMT_VINFO: The stmt from which the pattern search begins.
1506 here it starts with B = (TYPE1) temp_out;
1508 Output:
1510 * TYPE_OUT: The vector type of the output of this pattern.
1512 * Return value: A new stmt that will be used to replace the sequence of
1513 stmts that constitute the pattern. In this case it will be:
1514 B = .POPCOUNT (A);
1517 static gimple *
1518 vect_recog_popcount_pattern (vec_info *vinfo,
1519 stmt_vec_info stmt_vinfo, tree *type_out)
1521 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1522 gimple *popcount_stmt, *pattern_stmt;
1523 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1524 auto_vec<tree> vargs;
1526 /* Find B = (TYPE1) temp_out. */
1527 if (!last_stmt)
1528 return NULL;
1529 tree_code code = gimple_assign_rhs_code (last_stmt);
1530 if (!CONVERT_EXPR_CODE_P (code))
1531 return NULL;
1533 lhs_oprnd = gimple_assign_lhs (last_stmt);
1534 lhs_type = TREE_TYPE (lhs_oprnd);
1535 if (!INTEGRAL_TYPE_P (lhs_type))
1536 return NULL;
1538 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1539 if (TREE_CODE (rhs_oprnd) != SSA_NAME
1540 || !has_single_use (rhs_oprnd))
1541 return NULL;
1542 popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1544 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1545 if (!is_gimple_call (popcount_stmt))
1546 return NULL;
1547 switch (gimple_call_combined_fn (popcount_stmt))
1549 CASE_CFN_POPCOUNT:
1550 break;
1551 default:
1552 return NULL;
1555 if (gimple_call_num_args (popcount_stmt) != 1)
1556 return NULL;
1558 rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1559 vect_unpromoted_value unprom_diff;
1560 rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1561 &unprom_diff);
1563 if (!rhs_origin)
1564 return NULL;
1566 /* Input and output of .POPCOUNT should be same-precision integer.
1567 Also A should be unsigned or same precision as temp_in,
1568 otherwise there would be sign_extend from A to temp_in. */
1569 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1570 || (!TYPE_UNSIGNED (unprom_diff.type)
1571 && (TYPE_PRECISION (unprom_diff.type)
1572 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1573 return NULL;
1574 vargs.safe_push (unprom_diff.op);
1576 vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1577 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1578 /* Do it only if the backend has popcount<vector_mode>2 pattern. */
1579 if (!vec_type
1580 || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1581 OPTIMIZE_FOR_SPEED))
1582 return NULL;
1584 /* Create B = .POPCOUNT (A). */
1585 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1586 pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1587 gimple_call_set_lhs (pattern_stmt, new_var);
1588 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1589 *type_out = vec_type;
1591 if (dump_enabled_p ())
1592 dump_printf_loc (MSG_NOTE, vect_location,
1593 "created pattern stmt: %G", pattern_stmt);
1594 return pattern_stmt;
1597 /* Function vect_recog_pow_pattern
1599 Try to find the following pattern:
1601 x = POW (y, N);
1603 with POW being one of pow, powf, powi, powif and N being
1604 either 2 or 0.5.
1606 Input:
1608 * STMT_VINFO: The stmt from which the pattern search begins.
1610 Output:
1612 * TYPE_OUT: The type of the output of this pattern.
1614 * Return value: A new stmt that will be used to replace the sequence of
1615 stmts that constitute the pattern. In this case it will be:
1616 x = x * x
1618 x = sqrt (x)
1621 static gimple *
1622 vect_recog_pow_pattern (vec_info *vinfo,
1623 stmt_vec_info stmt_vinfo, tree *type_out)
1625 gimple *last_stmt = stmt_vinfo->stmt;
1626 tree base, exp;
1627 gimple *stmt;
1628 tree var;
1630 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1631 return NULL;
1633 switch (gimple_call_combined_fn (last_stmt))
1635 CASE_CFN_POW:
1636 CASE_CFN_POWI:
1637 break;
1639 default:
1640 return NULL;
1643 base = gimple_call_arg (last_stmt, 0);
1644 exp = gimple_call_arg (last_stmt, 1);
1645 if (TREE_CODE (exp) != REAL_CST
1646 && TREE_CODE (exp) != INTEGER_CST)
1648 if (flag_unsafe_math_optimizations
1649 && TREE_CODE (base) == REAL_CST
1650 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1652 combined_fn log_cfn;
1653 built_in_function exp_bfn;
1654 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1656 case BUILT_IN_POW:
1657 log_cfn = CFN_BUILT_IN_LOG;
1658 exp_bfn = BUILT_IN_EXP;
1659 break;
1660 case BUILT_IN_POWF:
1661 log_cfn = CFN_BUILT_IN_LOGF;
1662 exp_bfn = BUILT_IN_EXPF;
1663 break;
1664 case BUILT_IN_POWL:
1665 log_cfn = CFN_BUILT_IN_LOGL;
1666 exp_bfn = BUILT_IN_EXPL;
1667 break;
1668 default:
1669 return NULL;
1671 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1672 tree exp_decl = builtin_decl_implicit (exp_bfn);
1673 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1674 does that, but if C is a power of 2, we want to use
1675 exp2 (log2 (C) * x) in the non-vectorized version, but for
1676 vectorization we don't have vectorized exp2. */
1677 if (logc
1678 && TREE_CODE (logc) == REAL_CST
1679 && exp_decl
1680 && lookup_attribute ("omp declare simd",
1681 DECL_ATTRIBUTES (exp_decl)))
1683 cgraph_node *node = cgraph_node::get_create (exp_decl);
1684 if (node->simd_clones == NULL)
1686 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1687 || node->definition)
1688 return NULL;
1689 expand_simd_clones (node);
1690 if (node->simd_clones == NULL)
1691 return NULL;
1693 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1694 if (!*type_out)
1695 return NULL;
1696 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1697 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1698 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1699 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1700 g = gimple_build_call (exp_decl, 1, def);
1701 gimple_call_set_lhs (g, res);
1702 return g;
1706 return NULL;
1709 /* We now have a pow or powi builtin function call with a constant
1710 exponent. */
1712 /* Catch squaring. */
1713 if ((tree_fits_shwi_p (exp)
1714 && tree_to_shwi (exp) == 2)
1715 || (TREE_CODE (exp) == REAL_CST
1716 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1718 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1719 TREE_TYPE (base), type_out))
1720 return NULL;
1722 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1723 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1724 return stmt;
1727 /* Catch square root. */
1728 if (TREE_CODE (exp) == REAL_CST
1729 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1731 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1732 if (*type_out
1733 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1734 OPTIMIZE_FOR_SPEED))
1736 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1737 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1738 gimple_call_set_lhs (stmt, var);
1739 gimple_call_set_nothrow (stmt, true);
1740 return stmt;
1744 return NULL;
1748 /* Function vect_recog_widen_sum_pattern
1750 Try to find the following pattern:
1752 type x_t;
1753 TYPE x_T, sum = init;
1754 loop:
1755 sum_0 = phi <init, sum_1>
1756 S1 x_t = *p;
1757 S2 x_T = (TYPE) x_t;
1758 S3 sum_1 = x_T + sum_0;
1760 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1761 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1762 a special case of a reduction computation.
1764 Input:
1766 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1767 when this function is called with S3, the pattern {S2,S3} will be detected.
1769 Output:
1771 * TYPE_OUT: The type of the output of this pattern.
1773 * Return value: A new stmt that will be used to replace the sequence of
1774 stmts that constitute the pattern. In this case it will be:
1775 WIDEN_SUM <x_t, sum_0>
1777 Note: The widening-sum idiom is a widening reduction pattern that is
1778 vectorized without preserving all the intermediate results. It
1779 produces only N/2 (widened) results (by summing up pairs of
1780 intermediate results) rather than all N results. Therefore, we
1781 cannot allow this pattern when we want to get all the results and in
1782 the correct order (as is the case when this computation is in an
1783 inner-loop nested in an outer-loop that us being vectorized). */
1785 static gimple *
1786 vect_recog_widen_sum_pattern (vec_info *vinfo,
1787 stmt_vec_info stmt_vinfo, tree *type_out)
1789 gimple *last_stmt = stmt_vinfo->stmt;
1790 tree oprnd0, oprnd1;
1791 tree type;
1792 gimple *pattern_stmt;
1793 tree var;
1795 /* Look for the following pattern
1796 DX = (TYPE) X;
1797 sum_1 = DX + sum_0;
1798 In which DX is at least double the size of X, and sum_1 has been
1799 recognized as a reduction variable.
1802 /* Starting from LAST_STMT, follow the defs of its uses in search
1803 of the above pattern. */
1805 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1806 &oprnd0, &oprnd1))
1807 return NULL;
1809 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1811 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1812 we know that oprnd1 is the reduction variable (defined by a loop-header
1813 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1814 Left to check that oprnd0 is defined by a cast from type 'type' to type
1815 'TYPE'. */
1817 vect_unpromoted_value unprom0;
1818 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1819 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1820 return NULL;
1822 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1824 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1825 unprom0.type, type_out))
1826 return NULL;
1828 var = vect_recog_temp_ssa_var (type, NULL);
1829 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1831 return pattern_stmt;
1834 /* Function vect_recog_bitfield_ref_pattern
1836 Try to find the following pattern:
1838 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
1839 result = (type_out) bf_value;
1841 where type_out is a non-bitfield type, that is to say, it's precision matches
1842 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
1844 Input:
1846 * STMT_VINFO: The stmt from which the pattern search begins.
1847 here it starts with:
1848 result = (type_out) bf_value;
1850 Output:
1852 * TYPE_OUT: The vector type of the output of this pattern.
1854 * Return value: A new stmt that will be used to replace the sequence of
1855 stmts that constitute the pattern. If the precision of type_out is bigger
1856 than the precision type of _1 we perform the widening before the shifting,
1857 since the new precision will be large enough to shift the value and moving
1858 widening operations up the statement chain enables the generation of
1859 widening loads. If we are widening and the operation after the pattern is
1860 an addition then we mask first and shift later, to enable the generation of
1861 shifting adds. In the case of narrowing we will always mask first, shift
1862 last and then perform a narrowing operation. This will enable the
1863 generation of narrowing shifts.
1865 Widening with mask first, shift later:
1866 container = (type_out) container;
1867 masked = container & (((1 << bitsize) - 1) << bitpos);
1868 result = patt2 >> masked;
1870 Widening with shift first, mask last:
1871 container = (type_out) container;
1872 shifted = container >> bitpos;
1873 result = shifted & ((1 << bitsize) - 1);
1875 Narrowing:
1876 masked = container & (((1 << bitsize) - 1) << bitpos);
1877 result = masked >> bitpos;
1878 result = (type_out) result;
1880 The shifting is always optional depending on whether bitpos != 0.
1884 static gimple *
1885 vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
1886 tree *type_out)
1888 gassign *first_stmt = dyn_cast <gassign *> (stmt_info->stmt);
1890 if (!first_stmt)
1891 return NULL;
1893 gassign *bf_stmt;
1894 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (first_stmt))
1895 && TREE_CODE (gimple_assign_rhs1 (first_stmt)) == SSA_NAME)
1897 gimple *second_stmt
1898 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (first_stmt));
1899 bf_stmt = dyn_cast <gassign *> (second_stmt);
1900 if (!bf_stmt
1901 || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
1902 return NULL;
1904 else
1905 return NULL;
1907 tree bf_ref = gimple_assign_rhs1 (bf_stmt);
1908 tree container = TREE_OPERAND (bf_ref, 0);
1910 if (!bit_field_offset (bf_ref).is_constant ()
1911 || !bit_field_size (bf_ref).is_constant ()
1912 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
1913 return NULL;
1915 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
1916 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
1917 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
1918 return NULL;
1920 gimple *use_stmt, *pattern_stmt;
1921 use_operand_p use_p;
1922 tree ret = gimple_assign_lhs (first_stmt);
1923 tree ret_type = TREE_TYPE (ret);
1924 bool shift_first = true;
1925 tree container_type = TREE_TYPE (container);
1926 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
1928 /* Calculate shift_n before the adjustments for widening loads, otherwise
1929 the container may change and we have to consider offset change for
1930 widening loads on big endianness. The shift_n calculated here can be
1931 independent of widening. */
1932 unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
1933 unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
1934 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
1935 if (BYTES_BIG_ENDIAN)
1936 shift_n = prec - shift_n - mask_width;
1938 /* We move the conversion earlier if the loaded type is smaller than the
1939 return type to enable the use of widening loads. */
1940 if (TYPE_PRECISION (TREE_TYPE (container)) < TYPE_PRECISION (ret_type)
1941 && !useless_type_conversion_p (TREE_TYPE (container), ret_type))
1943 pattern_stmt
1944 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
1945 NOP_EXPR, container);
1946 container = gimple_get_lhs (pattern_stmt);
1947 container_type = TREE_TYPE (container);
1948 prec = tree_to_uhwi (TYPE_SIZE (container_type));
1949 vectype = get_vectype_for_scalar_type (vinfo, container_type);
1950 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
1952 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
1953 /* If we are doing the conversion last then also delay the shift as we may
1954 be able to combine the shift and conversion in certain cases. */
1955 shift_first = false;
1957 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
1958 PLUS_EXPR then do the shift last as some targets can combine the shift and
1959 add into a single instruction. */
1960 if (single_imm_use (gimple_assign_lhs (first_stmt), &use_p, &use_stmt))
1962 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
1963 && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
1964 shift_first = false;
1967 /* If we don't have to shift we only generate the mask, so just fix the
1968 code-path to shift_first. */
1969 if (shift_n == 0)
1970 shift_first = true;
1972 tree result;
1973 if (shift_first)
1975 tree shifted = container;
1976 if (shift_n)
1978 pattern_stmt
1979 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
1980 RSHIFT_EXPR, container,
1981 build_int_cst (sizetype, shift_n));
1982 shifted = gimple_assign_lhs (pattern_stmt);
1983 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
1986 tree mask = wide_int_to_tree (container_type,
1987 wi::mask (mask_width, false, prec));
1989 pattern_stmt
1990 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
1991 BIT_AND_EXPR, shifted, mask);
1992 result = gimple_assign_lhs (pattern_stmt);
1994 else
1996 tree mask = wide_int_to_tree (container_type,
1997 wi::shifted_mask (shift_n, mask_width,
1998 false, prec));
1999 pattern_stmt
2000 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2001 BIT_AND_EXPR, container, mask);
2002 tree masked = gimple_assign_lhs (pattern_stmt);
2004 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2005 pattern_stmt
2006 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2007 RSHIFT_EXPR, masked,
2008 build_int_cst (sizetype, shift_n));
2009 result = gimple_assign_lhs (pattern_stmt);
2012 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2014 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2015 pattern_stmt
2016 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2017 NOP_EXPR, result);
2020 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2021 vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2023 return pattern_stmt;
2026 /* Function vect_recog_bit_insert_pattern
2028 Try to find the following pattern:
2030 written = BIT_INSERT_EXPR (container, value, bitpos);
2032 Input:
2034 * STMT_VINFO: The stmt we want to replace.
2036 Output:
2038 * TYPE_OUT: The vector type of the output of this pattern.
2040 * Return value: A new stmt that will be used to replace the sequence of
2041 stmts that constitute the pattern. In this case it will be:
2042 value = (container_type) value; // Make sure
2043 shifted = value << bitpos; // Shift value into place
2044 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2045 // the 'to-write value'.
2046 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2047 // write to from the value we want
2048 // to write to.
2049 written = cleared | masked; // Write bits.
2052 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2053 bits corresponding to the real size of the bitfield value we are writing to.
2054 The shifting is always optional depending on whether bitpos != 0.
2058 static gimple *
2059 vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2060 tree *type_out)
2062 gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2063 if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2064 return NULL;
2066 tree container = gimple_assign_rhs1 (bf_stmt);
2067 tree value = gimple_assign_rhs2 (bf_stmt);
2068 tree shift = gimple_assign_rhs3 (bf_stmt);
2070 tree bf_type = TREE_TYPE (value);
2071 tree container_type = TREE_TYPE (container);
2073 if (!INTEGRAL_TYPE_P (container_type)
2074 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2075 return NULL;
2077 gimple *pattern_stmt;
2079 vect_unpromoted_value unprom;
2080 unprom.set_op (value, vect_internal_def);
2081 value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2082 get_vectype_for_scalar_type (vinfo,
2083 container_type));
2085 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2086 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2087 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2088 if (BYTES_BIG_ENDIAN)
2090 shift_n = prec - shift_n - mask_width;
2091 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2094 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2096 pattern_stmt =
2097 gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2098 NOP_EXPR, value);
2099 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2100 value = gimple_get_lhs (pattern_stmt);
2103 /* Shift VALUE into place. */
2104 tree shifted = value;
2105 if (shift_n)
2107 gimple_seq stmts = NULL;
2108 shifted
2109 = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2110 if (!gimple_seq_empty_p (stmts))
2111 append_pattern_def_seq (vinfo, stmt_info,
2112 gimple_seq_first_stmt (stmts));
2115 tree mask_t
2116 = wide_int_to_tree (container_type,
2117 wi::shifted_mask (shift_n, mask_width, false, prec));
2119 /* Clear bits we don't want to write back from SHIFTED. */
2120 gimple_seq stmts = NULL;
2121 tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2122 mask_t);
2123 if (!gimple_seq_empty_p (stmts))
2125 pattern_stmt = gimple_seq_first_stmt (stmts);
2126 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2129 /* Mask off the bits in the container that we are to write to. */
2130 mask_t = wide_int_to_tree (container_type,
2131 wi::shifted_mask (shift_n, mask_width, true, prec));
2132 tree cleared = vect_recog_temp_ssa_var (container_type);
2133 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2134 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2136 /* Write MASKED into CLEARED. */
2137 pattern_stmt
2138 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2139 BIT_IOR_EXPR, cleared, masked);
2141 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2142 vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2144 return pattern_stmt;
2148 /* Recognize cases in which an operation is performed in one type WTYPE
2149 but could be done more efficiently in a narrower type NTYPE. For example,
2150 if we have:
2152 ATYPE a; // narrower than NTYPE
2153 BTYPE b; // narrower than NTYPE
2154 WTYPE aw = (WTYPE) a;
2155 WTYPE bw = (WTYPE) b;
2156 WTYPE res = aw + bw; // only uses of aw and bw
2158 then it would be more efficient to do:
2160 NTYPE an = (NTYPE) a;
2161 NTYPE bn = (NTYPE) b;
2162 NTYPE resn = an + bn;
2163 WTYPE res = (WTYPE) resn;
2165 Other situations include things like:
2167 ATYPE a; // NTYPE or narrower
2168 WTYPE aw = (WTYPE) a;
2169 WTYPE res = aw + b;
2171 when only "(NTYPE) res" is significant. In that case it's more efficient
2172 to truncate "b" and do the operation on NTYPE instead:
2174 NTYPE an = (NTYPE) a;
2175 NTYPE bn = (NTYPE) b; // truncation
2176 NTYPE resn = an + bn;
2177 WTYPE res = (WTYPE) resn;
2179 All users of "res" should then use "resn" instead, making the final
2180 statement dead (not marked as relevant). The final statement is still
2181 needed to maintain the type correctness of the IR.
2183 vect_determine_precisions has already determined the minimum
2184 precison of the operation and the minimum precision required
2185 by users of the result. */
2187 static gimple *
2188 vect_recog_over_widening_pattern (vec_info *vinfo,
2189 stmt_vec_info last_stmt_info, tree *type_out)
2191 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2192 if (!last_stmt)
2193 return NULL;
2195 /* See whether we have found that this operation can be done on a
2196 narrower type without changing its semantics. */
2197 unsigned int new_precision = last_stmt_info->operation_precision;
2198 if (!new_precision)
2199 return NULL;
2201 tree lhs = gimple_assign_lhs (last_stmt);
2202 tree type = TREE_TYPE (lhs);
2203 tree_code code = gimple_assign_rhs_code (last_stmt);
2205 /* Punt for reductions where we don't handle the type conversions. */
2206 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
2207 return NULL;
2209 /* Keep the first operand of a COND_EXPR as-is: only the other two
2210 operands are interesting. */
2211 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
2213 /* Check the operands. */
2214 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
2215 auto_vec <vect_unpromoted_value, 3> unprom (nops);
2216 unprom.quick_grow (nops);
2217 unsigned int min_precision = 0;
2218 bool single_use_p = false;
2219 for (unsigned int i = 0; i < nops; ++i)
2221 tree op = gimple_op (last_stmt, first_op + i);
2222 if (TREE_CODE (op) == INTEGER_CST)
2223 unprom[i].set_op (op, vect_constant_def);
2224 else if (TREE_CODE (op) == SSA_NAME)
2226 bool op_single_use_p = true;
2227 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
2228 &op_single_use_p))
2229 return NULL;
2230 /* If:
2232 (1) N bits of the result are needed;
2233 (2) all inputs are widened from M<N bits; and
2234 (3) one operand OP is a single-use SSA name
2236 we can shift the M->N widening from OP to the output
2237 without changing the number or type of extensions involved.
2238 This then reduces the number of copies of STMT_INFO.
2240 If instead of (3) more than one operand is a single-use SSA name,
2241 shifting the extension to the output is even more of a win.
2243 If instead:
2245 (1) N bits of the result are needed;
2246 (2) one operand OP2 is widened from M2<N bits;
2247 (3) another operand OP1 is widened from M1<M2 bits; and
2248 (4) both OP1 and OP2 are single-use
2250 the choice is between:
2252 (a) truncating OP2 to M1, doing the operation on M1,
2253 and then widening the result to N
2255 (b) widening OP1 to M2, doing the operation on M2, and then
2256 widening the result to N
2258 Both shift the M2->N widening of the inputs to the output.
2259 (a) additionally shifts the M1->M2 widening to the output;
2260 it requires fewer copies of STMT_INFO but requires an extra
2261 M2->M1 truncation.
2263 Which is better will depend on the complexity and cost of
2264 STMT_INFO, which is hard to predict at this stage. However,
2265 a clear tie-breaker in favor of (b) is the fact that the
2266 truncation in (a) increases the length of the operation chain.
2268 If instead of (4) only one of OP1 or OP2 is single-use,
2269 (b) is still a win over doing the operation in N bits:
2270 it still shifts the M2->N widening on the single-use operand
2271 to the output and reduces the number of STMT_INFO copies.
2273 If neither operand is single-use then operating on fewer than
2274 N bits might lead to more extensions overall. Whether it does
2275 or not depends on global information about the vectorization
2276 region, and whether that's a good trade-off would again
2277 depend on the complexity and cost of the statements involved,
2278 as well as things like register pressure that are not normally
2279 modelled at this stage. We therefore ignore these cases
2280 and just optimize the clear single-use wins above.
2282 Thus we take the maximum precision of the unpromoted operands
2283 and record whether any operand is single-use. */
2284 if (unprom[i].dt == vect_internal_def)
2286 min_precision = MAX (min_precision,
2287 TYPE_PRECISION (unprom[i].type));
2288 single_use_p |= op_single_use_p;
2291 else
2292 return NULL;
2295 /* Although the operation could be done in operation_precision, we have
2296 to balance that against introducing extra truncations or extensions.
2297 Calculate the minimum precision that can be handled efficiently.
2299 The loop above determined that the operation could be handled
2300 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
2301 extension from the inputs to the output without introducing more
2302 instructions, and would reduce the number of instructions required
2303 for STMT_INFO itself.
2305 vect_determine_precisions has also determined that the result only
2306 needs min_output_precision bits. Truncating by a factor of N times
2307 requires a tree of N - 1 instructions, so if TYPE is N times wider
2308 than min_output_precision, doing the operation in TYPE and truncating
2309 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
2310 In contrast:
2312 - truncating the input to a unary operation and doing the operation
2313 in the new type requires at most N - 1 + 1 = N instructions per
2314 output vector
2316 - doing the same for a binary operation requires at most
2317 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
2319 Both unary and binary operations require fewer instructions than
2320 this if the operands were extended from a suitable truncated form.
2321 Thus there is usually nothing to lose by doing operations in
2322 min_output_precision bits, but there can be something to gain. */
2323 if (!single_use_p)
2324 min_precision = last_stmt_info->min_output_precision;
2325 else
2326 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
2328 /* Apply the minimum efficient precision we just calculated. */
2329 if (new_precision < min_precision)
2330 new_precision = min_precision;
2331 new_precision = vect_element_precision (new_precision);
2332 if (new_precision >= TYPE_PRECISION (type))
2333 return NULL;
2335 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
2337 *type_out = get_vectype_for_scalar_type (vinfo, type);
2338 if (!*type_out)
2339 return NULL;
2341 /* We've found a viable pattern. Get the new type of the operation. */
2342 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
2343 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
2345 /* If we're truncating an operation, we need to make sure that we
2346 don't introduce new undefined overflow. The codes tested here are
2347 a subset of those accepted by vect_truncatable_operation_p. */
2348 tree op_type = new_type;
2349 if (TYPE_OVERFLOW_UNDEFINED (new_type)
2350 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
2351 op_type = build_nonstandard_integer_type (new_precision, true);
2353 /* We specifically don't check here whether the target supports the
2354 new operation, since it might be something that a later pattern
2355 wants to rewrite anyway. If targets have a minimum element size
2356 for some optabs, we should pattern-match smaller ops to larger ops
2357 where beneficial. */
2358 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2359 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
2360 if (!new_vectype || !op_vectype)
2361 return NULL;
2363 if (dump_enabled_p ())
2364 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
2365 type, new_type);
2367 /* Calculate the rhs operands for an operation on OP_TYPE. */
2368 tree ops[3] = {};
2369 for (unsigned int i = 1; i < first_op; ++i)
2370 ops[i - 1] = gimple_op (last_stmt, i);
2371 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
2372 op_type, &unprom[0], op_vectype);
2374 /* Use the operation to produce a result of type OP_TYPE. */
2375 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
2376 gimple *pattern_stmt = gimple_build_assign (new_var, code,
2377 ops[0], ops[1], ops[2]);
2378 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2380 if (dump_enabled_p ())
2381 dump_printf_loc (MSG_NOTE, vect_location,
2382 "created pattern stmt: %G", pattern_stmt);
2384 /* Convert back to the original signedness, if OP_TYPE is different
2385 from NEW_TYPE. */
2386 if (op_type != new_type)
2387 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
2388 pattern_stmt, op_vectype);
2390 /* Promote the result to the original type. */
2391 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
2392 pattern_stmt, new_vectype);
2394 return pattern_stmt;
2397 /* Recognize the following patterns:
2399 ATYPE a; // narrower than TYPE
2400 BTYPE b; // narrower than TYPE
2402 1) Multiply high with scaling
2403 TYPE res = ((TYPE) a * (TYPE) b) >> c;
2404 Here, c is bitsize (TYPE) / 2 - 1.
2406 2) ... or also with rounding
2407 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2408 Here, d is bitsize (TYPE) / 2 - 2.
2410 3) Normal multiply high
2411 TYPE res = ((TYPE) a * (TYPE) b) >> e;
2412 Here, e is bitsize (TYPE) / 2.
2414 where only the bottom half of res is used. */
2416 static gimple *
2417 vect_recog_mulhs_pattern (vec_info *vinfo,
2418 stmt_vec_info last_stmt_info, tree *type_out)
2420 /* Check for a right shift. */
2421 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2422 if (!last_stmt
2423 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
2424 return NULL;
2426 /* Check that the shift result is wider than the users of the
2427 result need (i.e. that narrowing would be a natural choice). */
2428 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
2429 unsigned int target_precision
2430 = vect_element_precision (last_stmt_info->min_output_precision);
2431 if (!INTEGRAL_TYPE_P (lhs_type)
2432 || target_precision >= TYPE_PRECISION (lhs_type))
2433 return NULL;
2435 /* Look through any change in sign on the outer shift input. */
2436 vect_unpromoted_value unprom_rshift_input;
2437 tree rshift_input = vect_look_through_possible_promotion
2438 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
2439 if (!rshift_input
2440 || TYPE_PRECISION (TREE_TYPE (rshift_input))
2441 != TYPE_PRECISION (lhs_type))
2442 return NULL;
2444 /* Get the definition of the shift input. */
2445 stmt_vec_info rshift_input_stmt_info
2446 = vect_get_internal_def (vinfo, rshift_input);
2447 if (!rshift_input_stmt_info)
2448 return NULL;
2449 gassign *rshift_input_stmt
2450 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2451 if (!rshift_input_stmt)
2452 return NULL;
2454 stmt_vec_info mulh_stmt_info;
2455 tree scale_term;
2456 bool rounding_p = false;
2458 /* Check for the presence of the rounding term. */
2459 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2461 /* Check that the outer shift was by 1. */
2462 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2463 return NULL;
2465 /* Check that the second operand of the PLUS_EXPR is 1. */
2466 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2467 return NULL;
2469 /* Look through any change in sign on the addition input. */
2470 vect_unpromoted_value unprom_plus_input;
2471 tree plus_input = vect_look_through_possible_promotion
2472 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2473 if (!plus_input
2474 || TYPE_PRECISION (TREE_TYPE (plus_input))
2475 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2476 return NULL;
2478 /* Get the definition of the multiply-high-scale part. */
2479 stmt_vec_info plus_input_stmt_info
2480 = vect_get_internal_def (vinfo, plus_input);
2481 if (!plus_input_stmt_info)
2482 return NULL;
2483 gassign *plus_input_stmt
2484 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2485 if (!plus_input_stmt
2486 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2487 return NULL;
2489 /* Look through any change in sign on the scaling input. */
2490 vect_unpromoted_value unprom_scale_input;
2491 tree scale_input = vect_look_through_possible_promotion
2492 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2493 if (!scale_input
2494 || TYPE_PRECISION (TREE_TYPE (scale_input))
2495 != TYPE_PRECISION (TREE_TYPE (plus_input)))
2496 return NULL;
2498 /* Get the definition of the multiply-high part. */
2499 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2500 if (!mulh_stmt_info)
2501 return NULL;
2503 /* Get the scaling term. */
2504 scale_term = gimple_assign_rhs2 (plus_input_stmt);
2505 rounding_p = true;
2507 else
2509 mulh_stmt_info = rshift_input_stmt_info;
2510 scale_term = gimple_assign_rhs2 (last_stmt);
2513 /* Check that the scaling factor is constant. */
2514 if (TREE_CODE (scale_term) != INTEGER_CST)
2515 return NULL;
2517 /* Check whether the scaling input term can be seen as two widened
2518 inputs multiplied together. */
2519 vect_unpromoted_value unprom_mult[2];
2520 tree new_type;
2521 unsigned int nops
2522 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2523 false, 2, unprom_mult, &new_type);
2524 if (nops != 2)
2525 return NULL;
2527 /* Adjust output precision. */
2528 if (TYPE_PRECISION (new_type) < target_precision)
2529 new_type = build_nonstandard_integer_type
2530 (target_precision, TYPE_UNSIGNED (new_type));
2532 unsigned mult_precision = TYPE_PRECISION (new_type);
2533 internal_fn ifn;
2534 /* Check that the scaling factor is expected. Instead of
2535 target_precision, we should use the one that we actually
2536 use for internal function. */
2537 if (rounding_p)
2539 /* Check pattern 2). */
2540 if (wi::to_widest (scale_term) + mult_precision + 2
2541 != TYPE_PRECISION (lhs_type))
2542 return NULL;
2544 ifn = IFN_MULHRS;
2546 else
2548 /* Check for pattern 1). */
2549 if (wi::to_widest (scale_term) + mult_precision + 1
2550 == TYPE_PRECISION (lhs_type))
2551 ifn = IFN_MULHS;
2552 /* Check for pattern 3). */
2553 else if (wi::to_widest (scale_term) + mult_precision
2554 == TYPE_PRECISION (lhs_type))
2555 ifn = IFN_MULH;
2556 else
2557 return NULL;
2560 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2562 /* Check for target support. */
2563 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2564 if (!new_vectype
2565 || !direct_internal_fn_supported_p
2566 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2567 return NULL;
2569 /* The IR requires a valid vector type for the cast result, even though
2570 it's likely to be discarded. */
2571 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2572 if (!*type_out)
2573 return NULL;
2575 /* Generate the IFN_MULHRS call. */
2576 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2577 tree new_ops[2];
2578 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2579 unprom_mult, new_vectype);
2580 gcall *mulhrs_stmt
2581 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2582 gimple_call_set_lhs (mulhrs_stmt, new_var);
2583 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2585 if (dump_enabled_p ())
2586 dump_printf_loc (MSG_NOTE, vect_location,
2587 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
2589 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2590 mulhrs_stmt, new_vectype);
2593 /* Recognize the patterns:
2595 ATYPE a; // narrower than TYPE
2596 BTYPE b; // narrower than TYPE
2597 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2598 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2600 where only the bottom half of avg is used. Try to transform them into:
2602 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2603 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2605 followed by:
2607 TYPE avg = (TYPE) avg';
2609 where NTYPE is no wider than half of TYPE. Since only the bottom half
2610 of avg is used, all or part of the cast of avg' should become redundant.
2612 If there is no target support available, generate code to distribute rshift
2613 over plus and add a carry. */
2615 static gimple *
2616 vect_recog_average_pattern (vec_info *vinfo,
2617 stmt_vec_info last_stmt_info, tree *type_out)
2619 /* Check for a shift right by one bit. */
2620 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2621 if (!last_stmt
2622 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2623 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2624 return NULL;
2626 /* Check that the shift result is wider than the users of the
2627 result need (i.e. that narrowing would be a natural choice). */
2628 tree lhs = gimple_assign_lhs (last_stmt);
2629 tree type = TREE_TYPE (lhs);
2630 unsigned int target_precision
2631 = vect_element_precision (last_stmt_info->min_output_precision);
2632 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2633 return NULL;
2635 /* Look through any change in sign on the shift input. */
2636 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2637 vect_unpromoted_value unprom_plus;
2638 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2639 &unprom_plus);
2640 if (!rshift_rhs
2641 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2642 return NULL;
2644 /* Get the definition of the shift input. */
2645 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2646 if (!plus_stmt_info)
2647 return NULL;
2649 /* Check whether the shift input can be seen as a tree of additions on
2650 2 or 3 widened inputs.
2652 Note that the pattern should be a win even if the result of one or
2653 more additions is reused elsewhere: if the pattern matches, we'd be
2654 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2655 internal_fn ifn = IFN_AVG_FLOOR;
2656 vect_unpromoted_value unprom[3];
2657 tree new_type;
2658 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2659 WIDEN_PLUS_EXPR, false, 3,
2660 unprom, &new_type);
2661 if (nops == 0)
2662 return NULL;
2663 if (nops == 3)
2665 /* Check that one operand is 1. */
2666 unsigned int i;
2667 for (i = 0; i < 3; ++i)
2668 if (integer_onep (unprom[i].op))
2669 break;
2670 if (i == 3)
2671 return NULL;
2672 /* Throw away the 1 operand and keep the other two. */
2673 if (i < 2)
2674 unprom[i] = unprom[2];
2675 ifn = IFN_AVG_CEIL;
2678 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2680 /* We know that:
2682 (a) the operation can be viewed as:
2684 TYPE widened0 = (TYPE) UNPROM[0];
2685 TYPE widened1 = (TYPE) UNPROM[1];
2686 TYPE tmp1 = widened0 + widened1 {+ 1};
2687 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2689 (b) the first two statements are equivalent to:
2691 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2692 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2694 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2695 where sensible;
2697 (d) all the operations can be performed correctly at twice the width of
2698 NEW_TYPE, due to the nature of the average operation; and
2700 (e) users of the result of the right shift need only TARGET_PRECISION
2701 bits, where TARGET_PRECISION is no more than half of TYPE's
2702 precision.
2704 Under these circumstances, the only situation in which NEW_TYPE
2705 could be narrower than TARGET_PRECISION is if widened0, widened1
2706 and an addition result are all used more than once. Thus we can
2707 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2708 as "free", whereas widening the result of the average instruction
2709 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2710 therefore better not to go narrower than TARGET_PRECISION. */
2711 if (TYPE_PRECISION (new_type) < target_precision)
2712 new_type = build_nonstandard_integer_type (target_precision,
2713 TYPE_UNSIGNED (new_type));
2715 /* Check for target support. */
2716 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2717 if (!new_vectype)
2718 return NULL;
2720 bool fallback_p = false;
2722 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2724 else if (TYPE_UNSIGNED (new_type)
2725 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2726 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2727 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2728 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2729 fallback_p = true;
2730 else
2731 return NULL;
2733 /* The IR requires a valid vector type for the cast result, even though
2734 it's likely to be discarded. */
2735 *type_out = get_vectype_for_scalar_type (vinfo, type);
2736 if (!*type_out)
2737 return NULL;
2739 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2740 tree new_ops[2];
2741 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2742 unprom, new_vectype);
2744 if (fallback_p)
2746 /* As a fallback, generate code for following sequence:
2748 shifted_op0 = new_ops[0] >> 1;
2749 shifted_op1 = new_ops[1] >> 1;
2750 sum_of_shifted = shifted_op0 + shifted_op1;
2751 unmasked_carry = new_ops[0] and/or new_ops[1];
2752 carry = unmasked_carry & 1;
2753 new_var = sum_of_shifted + carry;
2756 tree one_cst = build_one_cst (new_type);
2757 gassign *g;
2759 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2760 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2761 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2763 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2764 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2765 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2767 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2768 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2769 shifted_op0, shifted_op1);
2770 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2772 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2773 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2774 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2775 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2777 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2778 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2779 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2781 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2782 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2785 /* Generate the IFN_AVG* call. */
2786 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2787 new_ops[1]);
2788 gimple_call_set_lhs (average_stmt, new_var);
2789 gimple_set_location (average_stmt, gimple_location (last_stmt));
2791 if (dump_enabled_p ())
2792 dump_printf_loc (MSG_NOTE, vect_location,
2793 "created pattern stmt: %G", (gimple *) average_stmt);
2795 return vect_convert_output (vinfo, last_stmt_info,
2796 type, average_stmt, new_vectype);
2799 /* Recognize cases in which the input to a cast is wider than its
2800 output, and the input is fed by a widening operation. Fold this
2801 by removing the unnecessary intermediate widening. E.g.:
2803 unsigned char a;
2804 unsigned int b = (unsigned int) a;
2805 unsigned short c = (unsigned short) b;
2809 unsigned short c = (unsigned short) a;
2811 Although this is rare in input IR, it is an expected side-effect
2812 of the over-widening pattern above.
2814 This is beneficial also for integer-to-float conversions, if the
2815 widened integer has more bits than the float, and if the unwidened
2816 input doesn't. */
2818 static gimple *
2819 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2820 stmt_vec_info last_stmt_info, tree *type_out)
2822 /* Check for a cast, including an integer-to-float conversion. */
2823 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2824 if (!last_stmt)
2825 return NULL;
2826 tree_code code = gimple_assign_rhs_code (last_stmt);
2827 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2828 return NULL;
2830 /* Make sure that the rhs is a scalar with a natural bitsize. */
2831 tree lhs = gimple_assign_lhs (last_stmt);
2832 if (!lhs)
2833 return NULL;
2834 tree lhs_type = TREE_TYPE (lhs);
2835 scalar_mode lhs_mode;
2836 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2837 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2838 return NULL;
2840 /* Check for a narrowing operation (from a vector point of view). */
2841 tree rhs = gimple_assign_rhs1 (last_stmt);
2842 tree rhs_type = TREE_TYPE (rhs);
2843 if (!INTEGRAL_TYPE_P (rhs_type)
2844 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2845 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2846 return NULL;
2848 /* Try to find an unpromoted input. */
2849 vect_unpromoted_value unprom;
2850 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2851 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2852 return NULL;
2854 /* If the bits above RHS_TYPE matter, make sure that they're the
2855 same when extending from UNPROM as they are when extending from RHS. */
2856 if (!INTEGRAL_TYPE_P (lhs_type)
2857 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2858 return NULL;
2860 /* We can get the same result by casting UNPROM directly, to avoid
2861 the unnecessary widening and narrowing. */
2862 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2864 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2865 if (!*type_out)
2866 return NULL;
2868 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2869 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2870 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2872 return pattern_stmt;
2875 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2876 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2878 static gimple *
2879 vect_recog_widen_shift_pattern (vec_info *vinfo,
2880 stmt_vec_info last_stmt_info, tree *type_out)
2882 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2883 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2884 "vect_recog_widen_shift_pattern");
2887 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2889 type a_t, b_t, c_t;
2891 S0 a_t = b_t r<< c_t;
2893 Input/Output:
2895 * STMT_VINFO: The stmt from which the pattern search begins,
2896 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2897 with a sequence:
2899 S1 d_t = -c_t;
2900 S2 e_t = d_t & (B - 1);
2901 S3 f_t = b_t << c_t;
2902 S4 g_t = b_t >> e_t;
2903 S0 a_t = f_t | g_t;
2905 where B is element bitsize of type.
2907 Output:
2909 * TYPE_OUT: The type of the output of this pattern.
2911 * Return value: A new stmt that will be used to replace the rotate
2912 S0 stmt. */
2914 static gimple *
2915 vect_recog_rotate_pattern (vec_info *vinfo,
2916 stmt_vec_info stmt_vinfo, tree *type_out)
2918 gimple *last_stmt = stmt_vinfo->stmt;
2919 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2920 gimple *pattern_stmt, *def_stmt;
2921 enum tree_code rhs_code;
2922 enum vect_def_type dt;
2923 optab optab1, optab2;
2924 edge ext_def = NULL;
2925 bool bswap16_p = false;
2927 if (is_gimple_assign (last_stmt))
2929 rhs_code = gimple_assign_rhs_code (last_stmt);
2930 switch (rhs_code)
2932 case LROTATE_EXPR:
2933 case RROTATE_EXPR:
2934 break;
2935 default:
2936 return NULL;
2939 lhs = gimple_assign_lhs (last_stmt);
2940 oprnd0 = gimple_assign_rhs1 (last_stmt);
2941 type = TREE_TYPE (oprnd0);
2942 oprnd1 = gimple_assign_rhs2 (last_stmt);
2944 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2946 /* __builtin_bswap16 (x) is another form of x r>> 8.
2947 The vectorizer has bswap support, but only if the argument isn't
2948 promoted. */
2949 lhs = gimple_call_lhs (last_stmt);
2950 oprnd0 = gimple_call_arg (last_stmt, 0);
2951 type = TREE_TYPE (oprnd0);
2952 if (!lhs
2953 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2954 || TYPE_PRECISION (type) <= 16
2955 || TREE_CODE (oprnd0) != SSA_NAME
2956 || BITS_PER_UNIT != 8)
2957 return NULL;
2959 stmt_vec_info def_stmt_info;
2960 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2961 return NULL;
2963 if (dt != vect_internal_def)
2964 return NULL;
2966 if (gimple_assign_cast_p (def_stmt))
2968 def = gimple_assign_rhs1 (def_stmt);
2969 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2970 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2971 oprnd0 = def;
2974 type = TREE_TYPE (lhs);
2975 vectype = get_vectype_for_scalar_type (vinfo, type);
2976 if (vectype == NULL_TREE)
2977 return NULL;
2979 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2981 /* The encoding uses one stepped pattern for each byte in the
2982 16-bit word. */
2983 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2984 for (unsigned i = 0; i < 3; ++i)
2985 for (unsigned j = 0; j < 2; ++j)
2986 elts.quick_push ((i + 1) * 2 - j - 1);
2988 vec_perm_indices indices (elts, 1,
2989 TYPE_VECTOR_SUBPARTS (char_vectype));
2990 machine_mode vmode = TYPE_MODE (char_vectype);
2991 if (can_vec_perm_const_p (vmode, vmode, indices))
2993 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2994 undo the argument promotion. */
2995 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2997 def = vect_recog_temp_ssa_var (type, NULL);
2998 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2999 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3000 oprnd0 = def;
3003 /* Pattern detected. */
3004 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3006 *type_out = vectype;
3008 /* Pattern supported. Create a stmt to be used to replace the
3009 pattern, with the unpromoted argument. */
3010 var = vect_recog_temp_ssa_var (type, NULL);
3011 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3012 1, oprnd0);
3013 gimple_call_set_lhs (pattern_stmt, var);
3014 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3015 gimple_call_fntype (last_stmt));
3016 return pattern_stmt;
3020 oprnd1 = build_int_cst (integer_type_node, 8);
3021 rhs_code = LROTATE_EXPR;
3022 bswap16_p = true;
3024 else
3025 return NULL;
3027 if (TREE_CODE (oprnd0) != SSA_NAME
3028 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
3029 || !INTEGRAL_TYPE_P (type))
3030 return NULL;
3032 stmt_vec_info def_stmt_info;
3033 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3034 return NULL;
3036 if (dt != vect_internal_def
3037 && dt != vect_constant_def
3038 && dt != vect_external_def)
3039 return NULL;
3041 vectype = get_vectype_for_scalar_type (vinfo, type);
3042 if (vectype == NULL_TREE)
3043 return NULL;
3045 /* If vector/vector or vector/scalar rotate is supported by the target,
3046 don't do anything here. */
3047 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3048 if (optab1
3049 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3051 use_rotate:
3052 if (bswap16_p)
3054 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3056 def = vect_recog_temp_ssa_var (type, NULL);
3057 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3058 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3059 oprnd0 = def;
3062 /* Pattern detected. */
3063 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3065 *type_out = vectype;
3067 /* Pattern supported. Create a stmt to be used to replace the
3068 pattern. */
3069 var = vect_recog_temp_ssa_var (type, NULL);
3070 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3071 oprnd1);
3072 return pattern_stmt;
3074 return NULL;
3077 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3079 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3080 if (optab2
3081 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3082 goto use_rotate;
3085 tree utype = unsigned_type_for (type);
3086 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3087 if (!uvectype)
3088 return NULL;
3090 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3091 don't do anything here either. */
3092 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3093 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3094 if (!optab1
3095 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3096 || !optab2
3097 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3099 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3100 return NULL;
3101 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3102 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3103 if (!optab1
3104 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3105 || !optab2
3106 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3107 return NULL;
3110 *type_out = vectype;
3112 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3114 def = vect_recog_temp_ssa_var (utype, NULL);
3115 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3116 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3117 oprnd0 = def;
3120 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3121 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3123 def = NULL_TREE;
3124 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3125 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3126 def = oprnd1;
3127 else if (def_stmt && gimple_assign_cast_p (def_stmt))
3129 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3130 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3131 && TYPE_PRECISION (TREE_TYPE (rhs1))
3132 == TYPE_PRECISION (type))
3133 def = rhs1;
3136 if (def == NULL_TREE)
3138 def = vect_recog_temp_ssa_var (utype, NULL);
3139 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3140 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3142 stype = TREE_TYPE (def);
3144 if (TREE_CODE (def) == INTEGER_CST)
3146 if (!tree_fits_uhwi_p (def)
3147 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3148 || integer_zerop (def))
3149 return NULL;
3150 def2 = build_int_cst (stype,
3151 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3153 else
3155 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3157 if (vecstype == NULL_TREE)
3158 return NULL;
3159 def2 = vect_recog_temp_ssa_var (stype, NULL);
3160 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3161 if (ext_def)
3163 basic_block new_bb
3164 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3165 gcc_assert (!new_bb);
3167 else
3168 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3170 def2 = vect_recog_temp_ssa_var (stype, NULL);
3171 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3172 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3173 gimple_assign_lhs (def_stmt), mask);
3174 if (ext_def)
3176 basic_block new_bb
3177 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3178 gcc_assert (!new_bb);
3180 else
3181 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3184 var1 = vect_recog_temp_ssa_var (utype, NULL);
3185 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3186 ? LSHIFT_EXPR : RSHIFT_EXPR,
3187 oprnd0, def);
3188 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3190 var2 = vect_recog_temp_ssa_var (utype, NULL);
3191 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3192 ? RSHIFT_EXPR : LSHIFT_EXPR,
3193 oprnd0, def2);
3194 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3196 /* Pattern detected. */
3197 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3199 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3200 var = vect_recog_temp_ssa_var (utype, NULL);
3201 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
3203 if (!useless_type_conversion_p (type, utype))
3205 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
3206 tree result = vect_recog_temp_ssa_var (type, NULL);
3207 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
3209 return pattern_stmt;
3212 /* Detect a vector by vector shift pattern that wouldn't be otherwise
3213 vectorized:
3215 type a_t;
3216 TYPE b_T, res_T;
3218 S1 a_t = ;
3219 S2 b_T = ;
3220 S3 res_T = b_T op a_t;
3222 where type 'TYPE' is a type with different size than 'type',
3223 and op is <<, >> or rotate.
3225 Also detect cases:
3227 type a_t;
3228 TYPE b_T, c_T, res_T;
3230 S0 c_T = ;
3231 S1 a_t = (type) c_T;
3232 S2 b_T = ;
3233 S3 res_T = b_T op a_t;
3235 Input/Output:
3237 * STMT_VINFO: The stmt from which the pattern search begins,
3238 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
3239 with a shift/rotate which has same type on both operands, in the
3240 second case just b_T op c_T, in the first case with added cast
3241 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
3243 Output:
3245 * TYPE_OUT: The type of the output of this pattern.
3247 * Return value: A new stmt that will be used to replace the shift/rotate
3248 S3 stmt. */
3250 static gimple *
3251 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
3252 stmt_vec_info stmt_vinfo,
3253 tree *type_out)
3255 gimple *last_stmt = stmt_vinfo->stmt;
3256 tree oprnd0, oprnd1, lhs, var;
3257 gimple *pattern_stmt;
3258 enum tree_code rhs_code;
3260 if (!is_gimple_assign (last_stmt))
3261 return NULL;
3263 rhs_code = gimple_assign_rhs_code (last_stmt);
3264 switch (rhs_code)
3266 case LSHIFT_EXPR:
3267 case RSHIFT_EXPR:
3268 case LROTATE_EXPR:
3269 case RROTATE_EXPR:
3270 break;
3271 default:
3272 return NULL;
3275 lhs = gimple_assign_lhs (last_stmt);
3276 oprnd0 = gimple_assign_rhs1 (last_stmt);
3277 oprnd1 = gimple_assign_rhs2 (last_stmt);
3278 if (TREE_CODE (oprnd0) != SSA_NAME
3279 || TREE_CODE (oprnd1) != SSA_NAME
3280 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
3281 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
3282 || TYPE_PRECISION (TREE_TYPE (lhs))
3283 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
3284 return NULL;
3286 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
3287 if (!def_vinfo)
3288 return NULL;
3290 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
3291 if (*type_out == NULL_TREE)
3292 return NULL;
3294 tree def = NULL_TREE;
3295 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
3296 if (def_stmt && gimple_assign_cast_p (def_stmt))
3298 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3299 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
3300 && TYPE_PRECISION (TREE_TYPE (rhs1))
3301 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
3303 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
3304 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
3305 def = rhs1;
3306 else
3308 tree mask
3309 = build_low_bits_mask (TREE_TYPE (rhs1),
3310 TYPE_PRECISION (TREE_TYPE (oprnd1)));
3311 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3312 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
3313 tree vecstype = get_vectype_for_scalar_type (vinfo,
3314 TREE_TYPE (rhs1));
3315 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3320 if (def == NULL_TREE)
3322 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
3323 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3324 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3327 /* Pattern detected. */
3328 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
3330 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3331 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
3332 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
3334 return pattern_stmt;
3337 /* Return true iff the target has a vector optab implementing the operation
3338 CODE on type VECTYPE. */
3340 static bool
3341 target_has_vecop_for_code (tree_code code, tree vectype)
3343 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
3344 return voptab
3345 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
3348 /* Verify that the target has optabs of VECTYPE to perform all the steps
3349 needed by the multiplication-by-immediate synthesis algorithm described by
3350 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
3351 present. Return true iff the target supports all the steps. */
3353 static bool
3354 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
3355 tree vectype, bool synth_shift_p)
3357 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
3358 return false;
3360 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
3361 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
3363 if (var == negate_variant
3364 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
3365 return false;
3367 /* If we must synthesize shifts with additions make sure that vector
3368 addition is available. */
3369 if ((var == add_variant || synth_shift_p) && !supports_vplus)
3370 return false;
3372 for (int i = 1; i < alg->ops; i++)
3374 switch (alg->op[i])
3376 case alg_shift:
3377 break;
3378 case alg_add_t_m2:
3379 case alg_add_t2_m:
3380 case alg_add_factor:
3381 if (!supports_vplus)
3382 return false;
3383 break;
3384 case alg_sub_t_m2:
3385 case alg_sub_t2_m:
3386 case alg_sub_factor:
3387 if (!supports_vminus)
3388 return false;
3389 break;
3390 case alg_unknown:
3391 case alg_m:
3392 case alg_zero:
3393 case alg_impossible:
3394 return false;
3395 default:
3396 gcc_unreachable ();
3400 return true;
3403 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3404 putting the final result in DEST. Append all statements but the last into
3405 VINFO. Return the last statement. */
3407 static gimple *
3408 synth_lshift_by_additions (vec_info *vinfo,
3409 tree dest, tree op, HOST_WIDE_INT amnt,
3410 stmt_vec_info stmt_info)
3412 HOST_WIDE_INT i;
3413 tree itype = TREE_TYPE (op);
3414 tree prev_res = op;
3415 gcc_assert (amnt >= 0);
3416 for (i = 0; i < amnt; i++)
3418 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
3419 : dest;
3420 gimple *stmt
3421 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
3422 prev_res = tmp_var;
3423 if (i < amnt - 1)
3424 append_pattern_def_seq (vinfo, stmt_info, stmt);
3425 else
3426 return stmt;
3428 gcc_unreachable ();
3429 return NULL;
3432 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
3433 CODE to operands OP1 and OP2, creating a new temporary SSA var in
3434 the process if necessary. Append the resulting assignment statements
3435 to the sequence in STMT_VINFO. Return the SSA variable that holds the
3436 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
3437 left shifts using additions. */
3439 static tree
3440 apply_binop_and_append_stmt (vec_info *vinfo,
3441 tree_code code, tree op1, tree op2,
3442 stmt_vec_info stmt_vinfo, bool synth_shift_p)
3444 if (integer_zerop (op2)
3445 && (code == LSHIFT_EXPR
3446 || code == PLUS_EXPR))
3448 gcc_assert (TREE_CODE (op1) == SSA_NAME);
3449 return op1;
3452 gimple *stmt;
3453 tree itype = TREE_TYPE (op1);
3454 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3456 if (code == LSHIFT_EXPR
3457 && synth_shift_p)
3459 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3460 TREE_INT_CST_LOW (op2), stmt_vinfo);
3461 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3462 return tmp_var;
3465 stmt = gimple_build_assign (tmp_var, code, op1, op2);
3466 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3467 return tmp_var;
3470 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3471 and simple arithmetic operations to be vectorized. Record the statements
3472 produced in STMT_VINFO and return the last statement in the sequence or
3473 NULL if it's not possible to synthesize such a multiplication.
3474 This function mirrors the behavior of expand_mult_const in expmed.cc but
3475 works on tree-ssa form. */
3477 static gimple *
3478 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3479 stmt_vec_info stmt_vinfo)
3481 tree itype = TREE_TYPE (op);
3482 machine_mode mode = TYPE_MODE (itype);
3483 struct algorithm alg;
3484 mult_variant variant;
3485 if (!tree_fits_shwi_p (val))
3486 return NULL;
3488 /* Multiplication synthesis by shifts, adds and subs can introduce
3489 signed overflow where the original operation didn't. Perform the
3490 operations on an unsigned type and cast back to avoid this.
3491 In the future we may want to relax this for synthesis algorithms
3492 that we can prove do not cause unexpected overflow. */
3493 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3495 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3496 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3497 if (!vectype)
3498 return NULL;
3500 /* Targets that don't support vector shifts but support vector additions
3501 can synthesize shifts that way. */
3502 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3504 HOST_WIDE_INT hwval = tree_to_shwi (val);
3505 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3506 The vectorizer's benefit analysis will decide whether it's beneficial
3507 to do this. */
3508 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
3509 ? TYPE_MODE (vectype) : mode,
3510 hwval, &alg, &variant, MAX_COST);
3511 if (!possible)
3512 return NULL;
3514 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
3515 return NULL;
3517 tree accumulator;
3519 /* Clear out the sequence of statements so we can populate it below. */
3520 gimple *stmt = NULL;
3522 if (cast_to_unsigned_p)
3524 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3525 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3526 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3527 op = tmp_op;
3530 if (alg.op[0] == alg_zero)
3531 accumulator = build_int_cst (multtype, 0);
3532 else
3533 accumulator = op;
3535 bool needs_fixup = (variant == negate_variant)
3536 || (variant == add_variant);
3538 for (int i = 1; i < alg.ops; i++)
3540 tree shft_log = build_int_cst (multtype, alg.log[i]);
3541 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3542 tree tmp_var = NULL_TREE;
3544 switch (alg.op[i])
3546 case alg_shift:
3547 if (synth_shift_p)
3548 stmt
3549 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3550 alg.log[i], stmt_vinfo);
3551 else
3552 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3553 shft_log);
3554 break;
3555 case alg_add_t_m2:
3556 tmp_var
3557 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3558 stmt_vinfo, synth_shift_p);
3559 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3560 tmp_var);
3561 break;
3562 case alg_sub_t_m2:
3563 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3564 shft_log, stmt_vinfo,
3565 synth_shift_p);
3566 /* In some algorithms the first step involves zeroing the
3567 accumulator. If subtracting from such an accumulator
3568 just emit the negation directly. */
3569 if (integer_zerop (accumulator))
3570 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3571 else
3572 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3573 tmp_var);
3574 break;
3575 case alg_add_t2_m:
3576 tmp_var
3577 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3578 shft_log, stmt_vinfo, synth_shift_p);
3579 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3580 break;
3581 case alg_sub_t2_m:
3582 tmp_var
3583 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3584 shft_log, stmt_vinfo, synth_shift_p);
3585 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3586 break;
3587 case alg_add_factor:
3588 tmp_var
3589 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3590 shft_log, stmt_vinfo, synth_shift_p);
3591 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3592 tmp_var);
3593 break;
3594 case alg_sub_factor:
3595 tmp_var
3596 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3597 shft_log, stmt_vinfo, synth_shift_p);
3598 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3599 accumulator);
3600 break;
3601 default:
3602 gcc_unreachable ();
3604 /* We don't want to append the last stmt in the sequence to stmt_vinfo
3605 but rather return it directly. */
3607 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3608 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3609 accumulator = accum_tmp;
3611 if (variant == negate_variant)
3613 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3614 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3615 accumulator = accum_tmp;
3616 if (cast_to_unsigned_p)
3617 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3619 else if (variant == add_variant)
3621 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3622 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3623 accumulator = accum_tmp;
3624 if (cast_to_unsigned_p)
3625 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3627 /* Move back to a signed if needed. */
3628 if (cast_to_unsigned_p)
3630 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3631 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3634 return stmt;
3637 /* Detect multiplication by constant and convert it into a sequence of
3638 shifts and additions, subtractions, negations. We reuse the
3639 choose_mult_variant algorithms from expmed.cc
3641 Input/Output:
3643 STMT_VINFO: The stmt from which the pattern search begins,
3644 i.e. the mult stmt.
3646 Output:
3648 * TYPE_OUT: The type of the output of this pattern.
3650 * Return value: A new stmt that will be used to replace
3651 the multiplication. */
3653 static gimple *
3654 vect_recog_mult_pattern (vec_info *vinfo,
3655 stmt_vec_info stmt_vinfo, tree *type_out)
3657 gimple *last_stmt = stmt_vinfo->stmt;
3658 tree oprnd0, oprnd1, vectype, itype;
3659 gimple *pattern_stmt;
3661 if (!is_gimple_assign (last_stmt))
3662 return NULL;
3664 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3665 return NULL;
3667 oprnd0 = gimple_assign_rhs1 (last_stmt);
3668 oprnd1 = gimple_assign_rhs2 (last_stmt);
3669 itype = TREE_TYPE (oprnd0);
3671 if (TREE_CODE (oprnd0) != SSA_NAME
3672 || TREE_CODE (oprnd1) != INTEGER_CST
3673 || !INTEGRAL_TYPE_P (itype)
3674 || !type_has_mode_precision_p (itype))
3675 return NULL;
3677 vectype = get_vectype_for_scalar_type (vinfo, itype);
3678 if (vectype == NULL_TREE)
3679 return NULL;
3681 /* If the target can handle vectorized multiplication natively,
3682 don't attempt to optimize this. */
3683 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3684 if (mul_optab != unknown_optab)
3686 machine_mode vec_mode = TYPE_MODE (vectype);
3687 int icode = (int) optab_handler (mul_optab, vec_mode);
3688 if (icode != CODE_FOR_nothing)
3689 return NULL;
3692 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3693 oprnd0, oprnd1, stmt_vinfo);
3694 if (!pattern_stmt)
3695 return NULL;
3697 /* Pattern detected. */
3698 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3700 *type_out = vectype;
3702 return pattern_stmt;
3705 /* Detect a signed division by a constant that wouldn't be
3706 otherwise vectorized:
3708 type a_t, b_t;
3710 S1 a_t = b_t / N;
3712 where type 'type' is an integral type and N is a constant.
3714 Similarly handle modulo by a constant:
3716 S4 a_t = b_t % N;
3718 Input/Output:
3720 * STMT_VINFO: The stmt from which the pattern search begins,
3721 i.e. the division stmt. S1 is replaced by if N is a power
3722 of two constant and type is signed:
3723 S3 y_t = b_t < 0 ? N - 1 : 0;
3724 S2 x_t = b_t + y_t;
3725 S1' a_t = x_t >> log2 (N);
3727 S4 is replaced if N is a power of two constant and
3728 type is signed by (where *_T temporaries have unsigned type):
3729 S9 y_T = b_t < 0 ? -1U : 0U;
3730 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3731 S7 z_t = (type) z_T;
3732 S6 w_t = b_t + z_t;
3733 S5 x_t = w_t & (N - 1);
3734 S4' a_t = x_t - z_t;
3736 Output:
3738 * TYPE_OUT: The type of the output of this pattern.
3740 * Return value: A new stmt that will be used to replace the division
3741 S1 or modulo S4 stmt. */
3743 static gimple *
3744 vect_recog_divmod_pattern (vec_info *vinfo,
3745 stmt_vec_info stmt_vinfo, tree *type_out)
3747 gimple *last_stmt = stmt_vinfo->stmt;
3748 tree oprnd0, oprnd1, vectype, itype, cond;
3749 gimple *pattern_stmt, *def_stmt;
3750 enum tree_code rhs_code;
3751 optab optab;
3752 tree q, cst;
3753 int dummy_int, prec;
3755 if (!is_gimple_assign (last_stmt))
3756 return NULL;
3758 rhs_code = gimple_assign_rhs_code (last_stmt);
3759 switch (rhs_code)
3761 case TRUNC_DIV_EXPR:
3762 case EXACT_DIV_EXPR:
3763 case TRUNC_MOD_EXPR:
3764 break;
3765 default:
3766 return NULL;
3769 oprnd0 = gimple_assign_rhs1 (last_stmt);
3770 oprnd1 = gimple_assign_rhs2 (last_stmt);
3771 itype = TREE_TYPE (oprnd0);
3772 if (TREE_CODE (oprnd0) != SSA_NAME
3773 || TREE_CODE (oprnd1) != INTEGER_CST
3774 || TREE_CODE (itype) != INTEGER_TYPE
3775 || !type_has_mode_precision_p (itype))
3776 return NULL;
3778 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3779 vectype = get_vectype_for_scalar_type (vinfo, itype);
3780 if (vectype == NULL_TREE)
3781 return NULL;
3783 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3785 /* If the target can handle vectorized division or modulo natively,
3786 don't attempt to optimize this, since native division is likely
3787 to give smaller code. */
3788 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3789 if (optab != unknown_optab)
3791 machine_mode vec_mode = TYPE_MODE (vectype);
3792 int icode = (int) optab_handler (optab, vec_mode);
3793 if (icode != CODE_FOR_nothing)
3794 return NULL;
3798 prec = TYPE_PRECISION (itype);
3799 if (integer_pow2p (oprnd1))
3801 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3802 return NULL;
3804 /* Pattern detected. */
3805 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3807 *type_out = vectype;
3809 /* Check if the target supports this internal function. */
3810 internal_fn ifn = IFN_DIV_POW2;
3811 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3813 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3815 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3816 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3817 gimple_call_set_lhs (div_stmt, var_div);
3819 if (rhs_code == TRUNC_MOD_EXPR)
3821 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3822 def_stmt
3823 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3824 LSHIFT_EXPR, var_div, shift);
3825 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3826 pattern_stmt
3827 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3828 MINUS_EXPR, oprnd0,
3829 gimple_assign_lhs (def_stmt));
3831 else
3832 pattern_stmt = div_stmt;
3833 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3835 return pattern_stmt;
3838 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3839 build_int_cst (itype, 0));
3840 if (rhs_code == TRUNC_DIV_EXPR
3841 || rhs_code == EXACT_DIV_EXPR)
3843 tree var = vect_recog_temp_ssa_var (itype, NULL);
3844 tree shift;
3845 def_stmt
3846 = gimple_build_assign (var, COND_EXPR, cond,
3847 fold_build2 (MINUS_EXPR, itype, oprnd1,
3848 build_int_cst (itype, 1)),
3849 build_int_cst (itype, 0));
3850 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3851 var = vect_recog_temp_ssa_var (itype, NULL);
3852 def_stmt
3853 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3854 gimple_assign_lhs (def_stmt));
3855 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3857 shift = build_int_cst (itype, tree_log2 (oprnd1));
3858 pattern_stmt
3859 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3860 RSHIFT_EXPR, var, shift);
3862 else
3864 tree signmask;
3865 if (compare_tree_int (oprnd1, 2) == 0)
3867 signmask = vect_recog_temp_ssa_var (itype, NULL);
3868 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3869 build_int_cst (itype, 1),
3870 build_int_cst (itype, 0));
3871 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3873 else
3875 tree utype
3876 = build_nonstandard_integer_type (prec, 1);
3877 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3878 tree shift
3879 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3880 - tree_log2 (oprnd1));
3881 tree var = vect_recog_temp_ssa_var (utype, NULL);
3883 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3884 build_int_cst (utype, -1),
3885 build_int_cst (utype, 0));
3886 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3887 var = vect_recog_temp_ssa_var (utype, NULL);
3888 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3889 gimple_assign_lhs (def_stmt),
3890 shift);
3891 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3892 signmask = vect_recog_temp_ssa_var (itype, NULL);
3893 def_stmt
3894 = gimple_build_assign (signmask, NOP_EXPR, var);
3895 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3897 def_stmt
3898 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3899 PLUS_EXPR, oprnd0, signmask);
3900 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3901 def_stmt
3902 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3903 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3904 fold_build2 (MINUS_EXPR, itype, oprnd1,
3905 build_int_cst (itype, 1)));
3906 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3908 pattern_stmt
3909 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3910 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3911 signmask);
3914 return pattern_stmt;
3916 else if ((cst = uniform_integer_cst_p (oprnd1))
3917 && targetm.vectorize.can_special_div_by_const (rhs_code, vectype,
3918 wi::to_wide (cst),
3919 NULL, NULL_RTX,
3920 NULL_RTX))
3922 return NULL;
3925 if (prec > HOST_BITS_PER_WIDE_INT
3926 || integer_zerop (oprnd1))
3927 return NULL;
3929 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3930 return NULL;
3932 if (TYPE_UNSIGNED (itype))
3934 unsigned HOST_WIDE_INT mh, ml;
3935 int pre_shift, post_shift;
3936 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3937 & GET_MODE_MASK (itype_mode));
3938 tree t1, t2, t3, t4;
3940 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3941 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3942 return NULL;
3944 /* Find a suitable multiplier and right shift count
3945 instead of multiplying with D. */
3946 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3948 /* If the suggested multiplier is more than SIZE bits, we can do better
3949 for even divisors, using an initial right shift. */
3950 if (mh != 0 && (d & 1) == 0)
3952 pre_shift = ctz_or_zero (d);
3953 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3954 &ml, &post_shift, &dummy_int);
3955 gcc_assert (!mh);
3957 else
3958 pre_shift = 0;
3960 if (mh != 0)
3962 if (post_shift - 1 >= prec)
3963 return NULL;
3965 /* t1 = oprnd0 h* ml;
3966 t2 = oprnd0 - t1;
3967 t3 = t2 >> 1;
3968 t4 = t1 + t3;
3969 q = t4 >> (post_shift - 1); */
3970 t1 = vect_recog_temp_ssa_var (itype, NULL);
3971 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3972 build_int_cst (itype, ml));
3973 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3975 t2 = vect_recog_temp_ssa_var (itype, NULL);
3976 def_stmt
3977 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3978 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3980 t3 = vect_recog_temp_ssa_var (itype, NULL);
3981 def_stmt
3982 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3983 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3985 t4 = vect_recog_temp_ssa_var (itype, NULL);
3986 def_stmt
3987 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3989 if (post_shift != 1)
3991 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3993 q = vect_recog_temp_ssa_var (itype, NULL);
3994 pattern_stmt
3995 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3996 build_int_cst (itype, post_shift - 1));
3998 else
4000 q = t4;
4001 pattern_stmt = def_stmt;
4004 else
4006 if (pre_shift >= prec || post_shift >= prec)
4007 return NULL;
4009 /* t1 = oprnd0 >> pre_shift;
4010 t2 = t1 h* ml;
4011 q = t2 >> post_shift; */
4012 if (pre_shift)
4014 t1 = vect_recog_temp_ssa_var (itype, NULL);
4015 def_stmt
4016 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4017 build_int_cst (NULL, pre_shift));
4018 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4020 else
4021 t1 = oprnd0;
4023 t2 = vect_recog_temp_ssa_var (itype, NULL);
4024 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4025 build_int_cst (itype, ml));
4027 if (post_shift)
4029 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4031 q = vect_recog_temp_ssa_var (itype, NULL);
4032 def_stmt
4033 = gimple_build_assign (q, RSHIFT_EXPR, t2,
4034 build_int_cst (itype, post_shift));
4036 else
4037 q = t2;
4039 pattern_stmt = def_stmt;
4042 else
4044 unsigned HOST_WIDE_INT ml;
4045 int post_shift;
4046 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4047 unsigned HOST_WIDE_INT abs_d;
4048 bool add = false;
4049 tree t1, t2, t3, t4;
4051 /* Give up for -1. */
4052 if (d == -1)
4053 return NULL;
4055 /* Since d might be INT_MIN, we have to cast to
4056 unsigned HOST_WIDE_INT before negating to avoid
4057 undefined signed overflow. */
4058 abs_d = (d >= 0
4059 ? (unsigned HOST_WIDE_INT) d
4060 : - (unsigned HOST_WIDE_INT) d);
4062 /* n rem d = n rem -d */
4063 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4065 d = abs_d;
4066 oprnd1 = build_int_cst (itype, abs_d);
4068 if (HOST_BITS_PER_WIDE_INT >= prec
4069 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4070 /* This case is not handled correctly below. */
4071 return NULL;
4073 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4074 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4076 add = true;
4077 ml |= HOST_WIDE_INT_M1U << (prec - 1);
4079 if (post_shift >= prec)
4080 return NULL;
4082 /* t1 = oprnd0 h* ml; */
4083 t1 = vect_recog_temp_ssa_var (itype, NULL);
4084 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4085 build_int_cst (itype, ml));
4087 if (add)
4089 /* t2 = t1 + oprnd0; */
4090 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4091 t2 = vect_recog_temp_ssa_var (itype, NULL);
4092 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4094 else
4095 t2 = t1;
4097 if (post_shift)
4099 /* t3 = t2 >> post_shift; */
4100 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4101 t3 = vect_recog_temp_ssa_var (itype, NULL);
4102 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
4103 build_int_cst (itype, post_shift));
4105 else
4106 t3 = t2;
4108 int msb = 1;
4109 value_range r;
4110 get_range_query (cfun)->range_of_expr (r, oprnd0);
4111 if (r.kind () == VR_RANGE)
4113 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
4114 msb = 0;
4115 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
4116 msb = -1;
4119 if (msb == 0 && d >= 0)
4121 /* q = t3; */
4122 q = t3;
4123 pattern_stmt = def_stmt;
4125 else
4127 /* t4 = oprnd0 >> (prec - 1);
4128 or if we know from VRP that oprnd0 >= 0
4129 t4 = 0;
4130 or if we know from VRP that oprnd0 < 0
4131 t4 = -1; */
4132 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4133 t4 = vect_recog_temp_ssa_var (itype, NULL);
4134 if (msb != 1)
4135 def_stmt = gimple_build_assign (t4, INTEGER_CST,
4136 build_int_cst (itype, msb));
4137 else
4138 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
4139 build_int_cst (itype, prec - 1));
4140 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4142 /* q = t3 - t4; or q = t4 - t3; */
4143 q = vect_recog_temp_ssa_var (itype, NULL);
4144 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
4145 d < 0 ? t3 : t4);
4149 if (rhs_code == TRUNC_MOD_EXPR)
4151 tree r, t1;
4153 /* We divided. Now finish by:
4154 t1 = q * oprnd1;
4155 r = oprnd0 - t1; */
4156 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
4158 t1 = vect_recog_temp_ssa_var (itype, NULL);
4159 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
4160 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4162 r = vect_recog_temp_ssa_var (itype, NULL);
4163 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
4166 /* Pattern detected. */
4167 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4169 *type_out = vectype;
4170 return pattern_stmt;
4173 /* Function vect_recog_mixed_size_cond_pattern
4175 Try to find the following pattern:
4177 type x_t, y_t;
4178 TYPE a_T, b_T, c_T;
4179 loop:
4180 S1 a_T = x_t CMP y_t ? b_T : c_T;
4182 where type 'TYPE' is an integral type which has different size
4183 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
4184 than 'type', the constants need to fit into an integer type
4185 with the same width as 'type') or results of conversion from 'type'.
4187 Input:
4189 * STMT_VINFO: The stmt from which the pattern search begins.
4191 Output:
4193 * TYPE_OUT: The type of the output of this pattern.
4195 * Return value: A new stmt that will be used to replace the pattern.
4196 Additionally a def_stmt is added.
4198 a_it = x_t CMP y_t ? b_it : c_it;
4199 a_T = (TYPE) a_it; */
4201 static gimple *
4202 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
4203 stmt_vec_info stmt_vinfo, tree *type_out)
4205 gimple *last_stmt = stmt_vinfo->stmt;
4206 tree cond_expr, then_clause, else_clause;
4207 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
4208 gimple *pattern_stmt, *def_stmt;
4209 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
4210 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
4211 bool promotion;
4212 tree comp_scalar_type;
4214 if (!is_gimple_assign (last_stmt)
4215 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
4216 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
4217 return NULL;
4219 cond_expr = gimple_assign_rhs1 (last_stmt);
4220 then_clause = gimple_assign_rhs2 (last_stmt);
4221 else_clause = gimple_assign_rhs3 (last_stmt);
4223 if (!COMPARISON_CLASS_P (cond_expr))
4224 return NULL;
4226 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
4227 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
4228 if (comp_vectype == NULL_TREE)
4229 return NULL;
4231 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
4232 if (types_compatible_p (type, comp_scalar_type)
4233 || ((TREE_CODE (then_clause) != INTEGER_CST
4234 || TREE_CODE (else_clause) != INTEGER_CST)
4235 && !INTEGRAL_TYPE_P (comp_scalar_type))
4236 || !INTEGRAL_TYPE_P (type))
4237 return NULL;
4239 if ((TREE_CODE (then_clause) != INTEGER_CST
4240 && !type_conversion_p (vinfo, then_clause, false,
4241 &orig_type0, &def_stmt0, &promotion))
4242 || (TREE_CODE (else_clause) != INTEGER_CST
4243 && !type_conversion_p (vinfo, else_clause, false,
4244 &orig_type1, &def_stmt1, &promotion)))
4245 return NULL;
4247 if (orig_type0 && orig_type1
4248 && !types_compatible_p (orig_type0, orig_type1))
4249 return NULL;
4251 if (orig_type0)
4253 if (!types_compatible_p (orig_type0, comp_scalar_type))
4254 return NULL;
4255 then_clause = gimple_assign_rhs1 (def_stmt0);
4256 itype = orig_type0;
4259 if (orig_type1)
4261 if (!types_compatible_p (orig_type1, comp_scalar_type))
4262 return NULL;
4263 else_clause = gimple_assign_rhs1 (def_stmt1);
4264 itype = orig_type1;
4268 HOST_WIDE_INT cmp_mode_size
4269 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
4271 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
4272 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
4273 return NULL;
4275 vectype = get_vectype_for_scalar_type (vinfo, type);
4276 if (vectype == NULL_TREE)
4277 return NULL;
4279 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
4280 return NULL;
4282 if (itype == NULL_TREE)
4283 itype = build_nonstandard_integer_type (cmp_mode_size,
4284 TYPE_UNSIGNED (type));
4286 if (itype == NULL_TREE
4287 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
4288 return NULL;
4290 vecitype = get_vectype_for_scalar_type (vinfo, itype);
4291 if (vecitype == NULL_TREE)
4292 return NULL;
4294 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
4295 return NULL;
4297 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
4299 if ((TREE_CODE (then_clause) == INTEGER_CST
4300 && !int_fits_type_p (then_clause, itype))
4301 || (TREE_CODE (else_clause) == INTEGER_CST
4302 && !int_fits_type_p (else_clause, itype)))
4303 return NULL;
4306 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4307 COND_EXPR, unshare_expr (cond_expr),
4308 fold_convert (itype, then_clause),
4309 fold_convert (itype, else_clause));
4310 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4311 NOP_EXPR, gimple_assign_lhs (def_stmt));
4313 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
4314 *type_out = vectype;
4316 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
4318 return pattern_stmt;
4322 /* Helper function of vect_recog_bool_pattern. Called recursively, return
4323 true if bool VAR can and should be optimized that way. Assume it shouldn't
4324 in case it's a result of a comparison which can be directly vectorized into
4325 a vector comparison. Fills in STMTS with all stmts visited during the
4326 walk. */
4328 static bool
4329 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
4331 tree rhs1;
4332 enum tree_code rhs_code;
4334 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4335 if (!def_stmt_info)
4336 return false;
4338 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
4339 if (!def_stmt)
4340 return false;
4342 if (stmts.contains (def_stmt))
4343 return true;
4345 rhs1 = gimple_assign_rhs1 (def_stmt);
4346 rhs_code = gimple_assign_rhs_code (def_stmt);
4347 switch (rhs_code)
4349 case SSA_NAME:
4350 if (! check_bool_pattern (rhs1, vinfo, stmts))
4351 return false;
4352 break;
4354 CASE_CONVERT:
4355 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
4356 return false;
4357 if (! check_bool_pattern (rhs1, vinfo, stmts))
4358 return false;
4359 break;
4361 case BIT_NOT_EXPR:
4362 if (! check_bool_pattern (rhs1, vinfo, stmts))
4363 return false;
4364 break;
4366 case BIT_AND_EXPR:
4367 case BIT_IOR_EXPR:
4368 case BIT_XOR_EXPR:
4369 if (! check_bool_pattern (rhs1, vinfo, stmts)
4370 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
4371 return false;
4372 break;
4374 default:
4375 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
4377 tree vecitype, comp_vectype;
4379 /* If the comparison can throw, then is_gimple_condexpr will be
4380 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
4381 if (stmt_could_throw_p (cfun, def_stmt))
4382 return false;
4384 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
4385 if (comp_vectype == NULL_TREE)
4386 return false;
4388 tree mask_type = get_mask_type_for_scalar_type (vinfo,
4389 TREE_TYPE (rhs1));
4390 if (mask_type
4391 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
4392 return false;
4394 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
4396 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4397 tree itype
4398 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4399 vecitype = get_vectype_for_scalar_type (vinfo, itype);
4400 if (vecitype == NULL_TREE)
4401 return false;
4403 else
4404 vecitype = comp_vectype;
4405 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
4406 return false;
4408 else
4409 return false;
4410 break;
4413 bool res = stmts.add (def_stmt);
4414 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
4415 gcc_assert (!res);
4417 return true;
4421 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
4422 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4423 pattern sequence. */
4425 static tree
4426 adjust_bool_pattern_cast (vec_info *vinfo,
4427 tree type, tree var, stmt_vec_info stmt_info)
4429 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4430 NOP_EXPR, var);
4431 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
4432 get_vectype_for_scalar_type (vinfo, type));
4433 return gimple_assign_lhs (cast_stmt);
4436 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
4437 VAR is an SSA_NAME that should be transformed from bool to a wider integer
4438 type, OUT_TYPE is the desired final integer type of the whole pattern.
4439 STMT_INFO is the info of the pattern root and is where pattern stmts should
4440 be associated with. DEFS is a map of pattern defs. */
4442 static void
4443 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
4444 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
4446 gimple *stmt = SSA_NAME_DEF_STMT (var);
4447 enum tree_code rhs_code, def_rhs_code;
4448 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
4449 location_t loc;
4450 gimple *pattern_stmt, *def_stmt;
4451 tree trueval = NULL_TREE;
4453 rhs1 = gimple_assign_rhs1 (stmt);
4454 rhs2 = gimple_assign_rhs2 (stmt);
4455 rhs_code = gimple_assign_rhs_code (stmt);
4456 loc = gimple_location (stmt);
4457 switch (rhs_code)
4459 case SSA_NAME:
4460 CASE_CONVERT:
4461 irhs1 = *defs.get (rhs1);
4462 itype = TREE_TYPE (irhs1);
4463 pattern_stmt
4464 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4465 SSA_NAME, irhs1);
4466 break;
4468 case BIT_NOT_EXPR:
4469 irhs1 = *defs.get (rhs1);
4470 itype = TREE_TYPE (irhs1);
4471 pattern_stmt
4472 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4473 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4474 break;
4476 case BIT_AND_EXPR:
4477 /* Try to optimize x = y & (a < b ? 1 : 0); into
4478 x = (a < b ? y : 0);
4480 E.g. for:
4481 bool a_b, b_b, c_b;
4482 TYPE d_T;
4484 S1 a_b = x1 CMP1 y1;
4485 S2 b_b = x2 CMP2 y2;
4486 S3 c_b = a_b & b_b;
4487 S4 d_T = (TYPE) c_b;
4489 we would normally emit:
4491 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4492 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4493 S3' c_T = a_T & b_T;
4494 S4' d_T = c_T;
4496 but we can save one stmt by using the
4497 result of one of the COND_EXPRs in the other COND_EXPR and leave
4498 BIT_AND_EXPR stmt out:
4500 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4501 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4502 S4' f_T = c_T;
4504 At least when VEC_COND_EXPR is implemented using masks
4505 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4506 computes the comparison masks and ands it, in one case with
4507 all ones vector, in the other case with a vector register.
4508 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4509 often more expensive. */
4510 def_stmt = SSA_NAME_DEF_STMT (rhs2);
4511 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4512 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4514 irhs1 = *defs.get (rhs1);
4515 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4516 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4517 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4519 rhs_code = def_rhs_code;
4520 rhs1 = def_rhs1;
4521 rhs2 = gimple_assign_rhs2 (def_stmt);
4522 trueval = irhs1;
4523 goto do_compare;
4525 else
4526 irhs2 = *defs.get (rhs2);
4527 goto and_ior_xor;
4529 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4530 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4531 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4533 irhs2 = *defs.get (rhs2);
4534 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4535 if (TYPE_PRECISION (TREE_TYPE (irhs2))
4536 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4538 rhs_code = def_rhs_code;
4539 rhs1 = def_rhs1;
4540 rhs2 = gimple_assign_rhs2 (def_stmt);
4541 trueval = irhs2;
4542 goto do_compare;
4544 else
4545 irhs1 = *defs.get (rhs1);
4546 goto and_ior_xor;
4548 /* FALLTHRU */
4549 case BIT_IOR_EXPR:
4550 case BIT_XOR_EXPR:
4551 irhs1 = *defs.get (rhs1);
4552 irhs2 = *defs.get (rhs2);
4553 and_ior_xor:
4554 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4555 != TYPE_PRECISION (TREE_TYPE (irhs2)))
4557 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4558 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4559 int out_prec = TYPE_PRECISION (out_type);
4560 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4561 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4562 stmt_info);
4563 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4564 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4565 stmt_info);
4566 else
4568 irhs1 = adjust_bool_pattern_cast (vinfo,
4569 out_type, irhs1, stmt_info);
4570 irhs2 = adjust_bool_pattern_cast (vinfo,
4571 out_type, irhs2, stmt_info);
4574 itype = TREE_TYPE (irhs1);
4575 pattern_stmt
4576 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4577 rhs_code, irhs1, irhs2);
4578 break;
4580 default:
4581 do_compare:
4582 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4583 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4584 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4585 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4586 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4588 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4589 itype
4590 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4592 else
4593 itype = TREE_TYPE (rhs1);
4594 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4595 if (trueval == NULL_TREE)
4596 trueval = build_int_cst (itype, 1);
4597 else
4598 gcc_checking_assert (useless_type_conversion_p (itype,
4599 TREE_TYPE (trueval)));
4600 pattern_stmt
4601 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4602 COND_EXPR, cond_expr, trueval,
4603 build_int_cst (itype, 0));
4604 break;
4607 gimple_set_location (pattern_stmt, loc);
4608 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4609 get_vectype_for_scalar_type (vinfo, itype));
4610 defs.put (var, gimple_assign_lhs (pattern_stmt));
4613 /* Comparison function to qsort a vector of gimple stmts after UID. */
4615 static int
4616 sort_after_uid (const void *p1, const void *p2)
4618 const gimple *stmt1 = *(const gimple * const *)p1;
4619 const gimple *stmt2 = *(const gimple * const *)p2;
4620 return gimple_uid (stmt1) - gimple_uid (stmt2);
4623 /* Create pattern stmts for all stmts participating in the bool pattern
4624 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4625 OUT_TYPE. Return the def of the pattern root. */
4627 static tree
4628 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4629 tree out_type, stmt_vec_info stmt_info)
4631 /* Gather original stmts in the bool pattern in their order of appearance
4632 in the IL. */
4633 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4634 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4635 i != bool_stmt_set.end (); ++i)
4636 bool_stmts.quick_push (*i);
4637 bool_stmts.qsort (sort_after_uid);
4639 /* Now process them in that order, producing pattern stmts. */
4640 hash_map <tree, tree> defs;
4641 for (unsigned i = 0; i < bool_stmts.length (); ++i)
4642 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4643 out_type, stmt_info, defs);
4645 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4646 gimple *pattern_stmt
4647 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4648 return gimple_assign_lhs (pattern_stmt);
4651 /* Return the proper type for converting bool VAR into
4652 an integer value or NULL_TREE if no such type exists.
4653 The type is chosen so that the converted value has the
4654 same number of elements as VAR's vector type. */
4656 static tree
4657 integer_type_for_mask (tree var, vec_info *vinfo)
4659 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4660 return NULL_TREE;
4662 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4663 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4664 return NULL_TREE;
4666 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4669 /* Function vect_recog_bool_pattern
4671 Try to find pattern like following:
4673 bool a_b, b_b, c_b, d_b, e_b;
4674 TYPE f_T;
4675 loop:
4676 S1 a_b = x1 CMP1 y1;
4677 S2 b_b = x2 CMP2 y2;
4678 S3 c_b = a_b & b_b;
4679 S4 d_b = x3 CMP3 y3;
4680 S5 e_b = c_b | d_b;
4681 S6 f_T = (TYPE) e_b;
4683 where type 'TYPE' is an integral type. Or a similar pattern
4684 ending in
4686 S6 f_Y = e_b ? r_Y : s_Y;
4688 as results from if-conversion of a complex condition.
4690 Input:
4692 * STMT_VINFO: The stmt at the end from which the pattern
4693 search begins, i.e. cast of a bool to
4694 an integer type.
4696 Output:
4698 * TYPE_OUT: The type of the output of this pattern.
4700 * Return value: A new stmt that will be used to replace the pattern.
4702 Assuming size of TYPE is the same as size of all comparisons
4703 (otherwise some casts would be added where needed), the above
4704 sequence we create related pattern stmts:
4705 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4706 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4707 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4708 S5' e_T = c_T | d_T;
4709 S6' f_T = e_T;
4711 Instead of the above S3' we could emit:
4712 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4713 S3' c_T = a_T | b_T;
4714 but the above is more efficient. */
4716 static gimple *
4717 vect_recog_bool_pattern (vec_info *vinfo,
4718 stmt_vec_info stmt_vinfo, tree *type_out)
4720 gimple *last_stmt = stmt_vinfo->stmt;
4721 enum tree_code rhs_code;
4722 tree var, lhs, rhs, vectype;
4723 gimple *pattern_stmt;
4725 if (!is_gimple_assign (last_stmt))
4726 return NULL;
4728 var = gimple_assign_rhs1 (last_stmt);
4729 lhs = gimple_assign_lhs (last_stmt);
4730 rhs_code = gimple_assign_rhs_code (last_stmt);
4732 if (rhs_code == VIEW_CONVERT_EXPR)
4733 var = TREE_OPERAND (var, 0);
4735 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4736 return NULL;
4738 hash_set<gimple *> bool_stmts;
4740 if (CONVERT_EXPR_CODE_P (rhs_code)
4741 || rhs_code == VIEW_CONVERT_EXPR)
4743 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4744 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4745 return NULL;
4746 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4748 if (check_bool_pattern (var, vinfo, bool_stmts))
4750 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4751 TREE_TYPE (lhs), stmt_vinfo);
4752 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4753 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4754 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4755 else
4756 pattern_stmt
4757 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4759 else
4761 tree type = integer_type_for_mask (var, vinfo);
4762 tree cst0, cst1, tmp;
4764 if (!type)
4765 return NULL;
4767 /* We may directly use cond with narrowed type to avoid
4768 multiple cond exprs with following result packing and
4769 perform single cond with packed mask instead. In case
4770 of widening we better make cond first and then extract
4771 results. */
4772 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4773 type = TREE_TYPE (lhs);
4775 cst0 = build_int_cst (type, 0);
4776 cst1 = build_int_cst (type, 1);
4777 tmp = vect_recog_temp_ssa_var (type, NULL);
4778 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4780 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4782 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4783 append_pattern_def_seq (vinfo, stmt_vinfo,
4784 pattern_stmt, new_vectype);
4786 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4787 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4791 *type_out = vectype;
4792 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4794 return pattern_stmt;
4796 else if (rhs_code == COND_EXPR
4797 && TREE_CODE (var) == SSA_NAME)
4799 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4800 if (vectype == NULL_TREE)
4801 return NULL;
4803 /* Build a scalar type for the boolean result that when
4804 vectorized matches the vector type of the result in
4805 size and number of elements. */
4806 unsigned prec
4807 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4808 TYPE_VECTOR_SUBPARTS (vectype));
4810 tree type
4811 = build_nonstandard_integer_type (prec,
4812 TYPE_UNSIGNED (TREE_TYPE (var)));
4813 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4814 return NULL;
4816 if (check_bool_pattern (var, vinfo, bool_stmts))
4817 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4818 else if (integer_type_for_mask (var, vinfo))
4819 return NULL;
4821 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4822 pattern_stmt
4823 = gimple_build_assign (lhs, COND_EXPR,
4824 build2 (NE_EXPR, boolean_type_node,
4825 var, build_int_cst (TREE_TYPE (var), 0)),
4826 gimple_assign_rhs2 (last_stmt),
4827 gimple_assign_rhs3 (last_stmt));
4828 *type_out = vectype;
4829 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4831 return pattern_stmt;
4833 else if (rhs_code == SSA_NAME
4834 && STMT_VINFO_DATA_REF (stmt_vinfo))
4836 stmt_vec_info pattern_stmt_info;
4837 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4838 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4839 return NULL;
4841 if (check_bool_pattern (var, vinfo, bool_stmts))
4842 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4843 TREE_TYPE (vectype), stmt_vinfo);
4844 else
4846 tree type = integer_type_for_mask (var, vinfo);
4847 tree cst0, cst1, new_vectype;
4849 if (!type)
4850 return NULL;
4852 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4853 type = TREE_TYPE (vectype);
4855 cst0 = build_int_cst (type, 0);
4856 cst1 = build_int_cst (type, 1);
4857 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4859 rhs = vect_recog_temp_ssa_var (type, NULL);
4860 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4861 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4864 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4865 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4867 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4868 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4869 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4870 rhs = rhs2;
4872 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4873 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4874 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4875 *type_out = vectype;
4876 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4878 return pattern_stmt;
4880 else
4881 return NULL;
4885 /* A helper for vect_recog_mask_conversion_pattern. Build
4886 conversion of MASK to a type suitable for masking VECTYPE.
4887 Built statement gets required vectype and is appended to
4888 a pattern sequence of STMT_VINFO.
4890 Return converted mask. */
4892 static tree
4893 build_mask_conversion (vec_info *vinfo,
4894 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4896 gimple *stmt;
4897 tree masktype, tmp;
4899 masktype = truth_type_for (vectype);
4900 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4901 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4902 append_pattern_def_seq (vinfo, stmt_vinfo,
4903 stmt, masktype, TREE_TYPE (vectype));
4905 return tmp;
4909 /* Function vect_recog_mask_conversion_pattern
4911 Try to find statements which require boolean type
4912 converison. Additional conversion statements are
4913 added to handle such cases. For example:
4915 bool m_1, m_2, m_3;
4916 int i_4, i_5;
4917 double d_6, d_7;
4918 char c_1, c_2, c_3;
4920 S1 m_1 = i_4 > i_5;
4921 S2 m_2 = d_6 < d_7;
4922 S3 m_3 = m_1 & m_2;
4923 S4 c_1 = m_3 ? c_2 : c_3;
4925 Will be transformed into:
4927 S1 m_1 = i_4 > i_5;
4928 S2 m_2 = d_6 < d_7;
4929 S3'' m_2' = (_Bool[bitsize=32])m_2
4930 S3' m_3' = m_1 & m_2';
4931 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4932 S4' c_1' = m_3'' ? c_2 : c_3; */
4934 static gimple *
4935 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4936 stmt_vec_info stmt_vinfo, tree *type_out)
4938 gimple *last_stmt = stmt_vinfo->stmt;
4939 enum tree_code rhs_code;
4940 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4941 tree vectype1, vectype2;
4942 stmt_vec_info pattern_stmt_info;
4943 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4944 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4946 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4947 if (is_gimple_call (last_stmt)
4948 && gimple_call_internal_p (last_stmt))
4950 gcall *pattern_stmt;
4952 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4953 int mask_argno = internal_fn_mask_index (ifn);
4954 if (mask_argno < 0)
4955 return NULL;
4957 bool store_p = internal_store_fn_p (ifn);
4958 if (store_p)
4960 int rhs_index = internal_fn_stored_value_index (ifn);
4961 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4962 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4964 else
4966 lhs = gimple_call_lhs (last_stmt);
4967 if (!lhs)
4968 return NULL;
4969 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4972 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4973 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4974 if (!mask_arg_type)
4975 return NULL;
4976 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4978 if (!vectype1 || !vectype2
4979 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4980 TYPE_VECTOR_SUBPARTS (vectype2)))
4981 return NULL;
4983 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4985 auto_vec<tree, 8> args;
4986 unsigned int nargs = gimple_call_num_args (last_stmt);
4987 args.safe_grow (nargs, true);
4988 for (unsigned int i = 0; i < nargs; ++i)
4989 args[i] = ((int) i == mask_argno
4990 ? tmp
4991 : gimple_call_arg (last_stmt, i));
4992 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4994 if (!store_p)
4996 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4997 gimple_call_set_lhs (pattern_stmt, lhs);
4999 gimple_call_set_nothrow (pattern_stmt, true);
5001 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5002 if (STMT_VINFO_DATA_REF (stmt_vinfo))
5003 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5005 *type_out = vectype1;
5006 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5008 return pattern_stmt;
5011 if (!is_gimple_assign (last_stmt))
5012 return NULL;
5014 gimple *pattern_stmt;
5015 lhs = gimple_assign_lhs (last_stmt);
5016 rhs1 = gimple_assign_rhs1 (last_stmt);
5017 rhs_code = gimple_assign_rhs_code (last_stmt);
5019 /* Check for cond expression requiring mask conversion. */
5020 if (rhs_code == COND_EXPR)
5022 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5024 if (TREE_CODE (rhs1) == SSA_NAME)
5026 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5027 if (!rhs1_type)
5028 return NULL;
5030 else if (COMPARISON_CLASS_P (rhs1))
5032 /* Check whether we're comparing scalar booleans and (if so)
5033 whether a better mask type exists than the mask associated
5034 with boolean-sized elements. This avoids unnecessary packs
5035 and unpacks if the booleans are set from comparisons of
5036 wider types. E.g. in:
5038 int x1, x2, x3, x4, y1, y1;
5040 bool b1 = (x1 == x2);
5041 bool b2 = (x3 == x4);
5042 ... = b1 == b2 ? y1 : y2;
5044 it is better for b1 and b2 to use the mask type associated
5045 with int elements rather bool (byte) elements. */
5046 rhs1_op0 = TREE_OPERAND (rhs1, 0);
5047 rhs1_op1 = TREE_OPERAND (rhs1, 1);
5048 if (!rhs1_op0 || !rhs1_op1)
5049 return NULL;
5050 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
5051 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
5053 if (!rhs1_op0_type)
5054 rhs1_type = TREE_TYPE (rhs1_op0);
5055 else if (!rhs1_op1_type)
5056 rhs1_type = TREE_TYPE (rhs1_op1);
5057 else if (TYPE_PRECISION (rhs1_op0_type)
5058 != TYPE_PRECISION (rhs1_op1_type))
5060 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
5061 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5062 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
5063 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5064 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
5066 if (abs (tmp0) > abs (tmp1))
5067 rhs1_type = rhs1_op1_type;
5068 else
5069 rhs1_type = rhs1_op0_type;
5071 else
5072 rhs1_type = build_nonstandard_integer_type
5073 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
5075 else
5076 rhs1_type = rhs1_op0_type;
5078 else
5079 return NULL;
5081 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5083 if (!vectype1 || !vectype2)
5084 return NULL;
5086 /* Continue if a conversion is needed. Also continue if we have
5087 a comparison whose vector type would normally be different from
5088 VECTYPE2 when considered in isolation. In that case we'll
5089 replace the comparison with an SSA name (so that we can record
5090 its vector type) and behave as though the comparison was an SSA
5091 name from the outset. */
5092 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5093 TYPE_VECTOR_SUBPARTS (vectype2))
5094 && !rhs1_op0_type
5095 && !rhs1_op1_type)
5096 return NULL;
5098 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
5099 in place, we can handle it in vectorizable_condition. This avoids
5100 unnecessary promotion stmts and increased vectorization factor. */
5101 if (COMPARISON_CLASS_P (rhs1)
5102 && INTEGRAL_TYPE_P (rhs1_type)
5103 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
5104 TYPE_VECTOR_SUBPARTS (vectype2)))
5106 enum vect_def_type dt;
5107 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
5108 && dt == vect_external_def
5109 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
5110 && (dt == vect_external_def
5111 || dt == vect_constant_def))
5113 tree wide_scalar_type = build_nonstandard_integer_type
5114 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
5115 tree vectype3 = get_vectype_for_scalar_type (vinfo,
5116 wide_scalar_type);
5117 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
5118 return NULL;
5122 /* If rhs1 is a comparison we need to move it into a
5123 separate statement. */
5124 if (TREE_CODE (rhs1) != SSA_NAME)
5126 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
5127 if (rhs1_op0_type
5128 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
5129 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
5130 vectype2, stmt_vinfo);
5131 if (rhs1_op1_type
5132 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
5133 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
5134 vectype2, stmt_vinfo);
5135 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
5136 rhs1_op0, rhs1_op1);
5137 rhs1 = tmp;
5138 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
5139 rhs1_type);
5142 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
5143 TYPE_VECTOR_SUBPARTS (vectype2)))
5144 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
5145 else
5146 tmp = rhs1;
5148 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5149 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
5150 gimple_assign_rhs2 (last_stmt),
5151 gimple_assign_rhs3 (last_stmt));
5153 *type_out = vectype1;
5154 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5156 return pattern_stmt;
5159 /* Now check for binary boolean operations requiring conversion for
5160 one of operands. */
5161 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5162 return NULL;
5164 if (rhs_code != BIT_IOR_EXPR
5165 && rhs_code != BIT_XOR_EXPR
5166 && rhs_code != BIT_AND_EXPR
5167 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
5168 return NULL;
5170 rhs2 = gimple_assign_rhs2 (last_stmt);
5172 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5173 rhs2_type = integer_type_for_mask (rhs2, vinfo);
5175 if (!rhs1_type || !rhs2_type
5176 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
5177 return NULL;
5179 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
5181 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5182 if (!vectype1)
5183 return NULL;
5184 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
5186 else
5188 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
5189 if (!vectype1)
5190 return NULL;
5191 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
5194 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5195 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
5197 *type_out = vectype1;
5198 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5200 return pattern_stmt;
5203 /* STMT_INFO is a load or store. If the load or store is conditional, return
5204 the boolean condition under which it occurs, otherwise return null. */
5206 static tree
5207 vect_get_load_store_mask (stmt_vec_info stmt_info)
5209 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
5211 gcc_assert (gimple_assign_single_p (def_assign));
5212 return NULL_TREE;
5215 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
5217 internal_fn ifn = gimple_call_internal_fn (def_call);
5218 int mask_index = internal_fn_mask_index (ifn);
5219 return gimple_call_arg (def_call, mask_index);
5222 gcc_unreachable ();
5225 /* Return MASK if MASK is suitable for masking an operation on vectors
5226 of type VECTYPE, otherwise convert it into such a form and return
5227 the result. Associate any conversion statements with STMT_INFO's
5228 pattern. */
5230 static tree
5231 vect_convert_mask_for_vectype (tree mask, tree vectype,
5232 stmt_vec_info stmt_info, vec_info *vinfo)
5234 tree mask_type = integer_type_for_mask (mask, vinfo);
5235 if (mask_type)
5237 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
5238 if (mask_vectype
5239 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
5240 TYPE_VECTOR_SUBPARTS (mask_vectype)))
5241 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
5243 return mask;
5246 /* Return the equivalent of:
5248 fold_convert (TYPE, VALUE)
5250 with the expectation that the operation will be vectorized.
5251 If new statements are needed, add them as pattern statements
5252 to STMT_INFO. */
5254 static tree
5255 vect_add_conversion_to_pattern (vec_info *vinfo,
5256 tree type, tree value, stmt_vec_info stmt_info)
5258 if (useless_type_conversion_p (type, TREE_TYPE (value)))
5259 return value;
5261 tree new_value = vect_recog_temp_ssa_var (type, NULL);
5262 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
5263 append_pattern_def_seq (vinfo, stmt_info, conversion,
5264 get_vectype_for_scalar_type (vinfo, type));
5265 return new_value;
5268 /* Try to convert STMT_INFO into a call to a gather load or scatter store
5269 internal function. Return the final statement on success and set
5270 *TYPE_OUT to the vector type being loaded or stored.
5272 This function only handles gathers and scatters that were recognized
5273 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
5275 static gimple *
5276 vect_recog_gather_scatter_pattern (vec_info *vinfo,
5277 stmt_vec_info stmt_info, tree *type_out)
5279 /* Currently we only support this for loop vectorization. */
5280 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5281 if (!loop_vinfo)
5282 return NULL;
5284 /* Make sure that we're looking at a gather load or scatter store. */
5285 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
5286 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5287 return NULL;
5289 /* Get the boolean that controls whether the load or store happens.
5290 This is null if the operation is unconditional. */
5291 tree mask = vect_get_load_store_mask (stmt_info);
5293 /* Make sure that the target supports an appropriate internal
5294 function for the gather/scatter operation. */
5295 gather_scatter_info gs_info;
5296 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
5297 || gs_info.ifn == IFN_LAST)
5298 return NULL;
5300 /* Convert the mask to the right form. */
5301 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
5302 gs_info.element_type);
5303 if (mask)
5304 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
5305 loop_vinfo);
5306 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
5307 || gs_info.ifn == IFN_MASK_GATHER_LOAD)
5308 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
5310 /* Get the invariant base and non-invariant offset, converting the
5311 latter to the same width as the vector elements. */
5312 tree base = gs_info.base;
5313 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
5314 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
5315 gs_info.offset, stmt_info);
5317 /* Build the new pattern statement. */
5318 tree scale = size_int (gs_info.scale);
5319 gcall *pattern_stmt;
5320 if (DR_IS_READ (dr))
5322 tree zero = build_zero_cst (gs_info.element_type);
5323 if (mask != NULL)
5324 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
5325 offset, scale, zero, mask);
5326 else
5327 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
5328 offset, scale, zero);
5329 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
5330 gimple_call_set_lhs (pattern_stmt, load_lhs);
5332 else
5334 tree rhs = vect_get_store_rhs (stmt_info);
5335 if (mask != NULL)
5336 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
5337 base, offset, scale, rhs,
5338 mask);
5339 else
5340 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
5341 base, offset, scale, rhs);
5343 gimple_call_set_nothrow (pattern_stmt, true);
5345 /* Copy across relevant vectorization info and associate DR with the
5346 new pattern statement instead of the original statement. */
5347 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
5348 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
5350 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5351 *type_out = vectype;
5352 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
5354 return pattern_stmt;
5357 /* Return true if TYPE is a non-boolean integer type. These are the types
5358 that we want to consider for narrowing. */
5360 static bool
5361 vect_narrowable_type_p (tree type)
5363 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
5366 /* Return true if the operation given by CODE can be truncated to N bits
5367 when only N bits of the output are needed. This is only true if bit N+1
5368 of the inputs has no effect on the low N bits of the result. */
5370 static bool
5371 vect_truncatable_operation_p (tree_code code)
5373 switch (code)
5375 case PLUS_EXPR:
5376 case MINUS_EXPR:
5377 case MULT_EXPR:
5378 case BIT_AND_EXPR:
5379 case BIT_IOR_EXPR:
5380 case BIT_XOR_EXPR:
5381 case COND_EXPR:
5382 return true;
5384 default:
5385 return false;
5389 /* Record that STMT_INFO could be changed from operating on TYPE to
5390 operating on a type with the precision and sign given by PRECISION
5391 and SIGN respectively. PRECISION is an arbitrary bit precision;
5392 it might not be a whole number of bytes. */
5394 static void
5395 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
5396 unsigned int precision, signop sign)
5398 /* Round the precision up to a whole number of bytes. */
5399 precision = vect_element_precision (precision);
5400 if (precision < TYPE_PRECISION (type)
5401 && (!stmt_info->operation_precision
5402 || stmt_info->operation_precision > precision))
5404 stmt_info->operation_precision = precision;
5405 stmt_info->operation_sign = sign;
5409 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5410 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
5411 is an arbitrary bit precision; it might not be a whole number of bytes. */
5413 static void
5414 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
5415 unsigned int min_input_precision)
5417 /* This operation in isolation only requires the inputs to have
5418 MIN_INPUT_PRECISION of precision, However, that doesn't mean
5419 that MIN_INPUT_PRECISION is a natural precision for the chain
5420 as a whole. E.g. consider something like:
5422 unsigned short *x, *y;
5423 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5425 The right shift can be done on unsigned chars, and only requires the
5426 result of "*x & 0xf0" to be done on unsigned chars. But taking that
5427 approach would mean turning a natural chain of single-vector unsigned
5428 short operations into one that truncates "*x" and then extends
5429 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5430 operation and one vector for each unsigned char operation.
5431 This would be a significant pessimization.
5433 Instead only propagate the maximum of this precision and the precision
5434 required by the users of the result. This means that we don't pessimize
5435 the case above but continue to optimize things like:
5437 unsigned char *y;
5438 unsigned short *x;
5439 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5441 Here we would truncate two vectors of *x to a single vector of
5442 unsigned chars and use single-vector unsigned char operations for
5443 everything else, rather than doing two unsigned short copies of
5444 "(*x & 0xf0) >> 4" and then truncating the result. */
5445 min_input_precision = MAX (min_input_precision,
5446 stmt_info->min_output_precision);
5448 if (min_input_precision < TYPE_PRECISION (type)
5449 && (!stmt_info->min_input_precision
5450 || stmt_info->min_input_precision > min_input_precision))
5451 stmt_info->min_input_precision = min_input_precision;
5454 /* Subroutine of vect_determine_min_output_precision. Return true if
5455 we can calculate a reduced number of output bits for STMT_INFO,
5456 whose result is LHS. */
5458 static bool
5459 vect_determine_min_output_precision_1 (vec_info *vinfo,
5460 stmt_vec_info stmt_info, tree lhs)
5462 /* Take the maximum precision required by users of the result. */
5463 unsigned int precision = 0;
5464 imm_use_iterator iter;
5465 use_operand_p use;
5466 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5468 gimple *use_stmt = USE_STMT (use);
5469 if (is_gimple_debug (use_stmt))
5470 continue;
5471 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5472 if (!use_stmt_info || !use_stmt_info->min_input_precision)
5473 return false;
5474 /* The input precision recorded for COND_EXPRs applies only to the
5475 "then" and "else" values. */
5476 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5477 if (assign
5478 && gimple_assign_rhs_code (assign) == COND_EXPR
5479 && use->use != gimple_assign_rhs2_ptr (assign)
5480 && use->use != gimple_assign_rhs3_ptr (assign))
5481 return false;
5482 precision = MAX (precision, use_stmt_info->min_input_precision);
5485 if (dump_enabled_p ())
5486 dump_printf_loc (MSG_NOTE, vect_location,
5487 "only the low %d bits of %T are significant\n",
5488 precision, lhs);
5489 stmt_info->min_output_precision = precision;
5490 return true;
5493 /* Calculate min_output_precision for STMT_INFO. */
5495 static void
5496 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5498 /* We're only interested in statements with a narrowable result. */
5499 tree lhs = gimple_get_lhs (stmt_info->stmt);
5500 if (!lhs
5501 || TREE_CODE (lhs) != SSA_NAME
5502 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5503 return;
5505 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5506 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5509 /* Use range information to decide whether STMT (described by STMT_INFO)
5510 could be done in a narrower type. This is effectively a forward
5511 propagation, since it uses context-independent information that applies
5512 to all users of an SSA name. */
5514 static void
5515 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5517 tree lhs = gimple_assign_lhs (stmt);
5518 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5519 return;
5521 tree type = TREE_TYPE (lhs);
5522 if (!vect_narrowable_type_p (type))
5523 return;
5525 /* First see whether we have any useful range information for the result. */
5526 unsigned int precision = TYPE_PRECISION (type);
5527 signop sign = TYPE_SIGN (type);
5528 wide_int min_value, max_value;
5529 if (!vect_get_range_info (lhs, &min_value, &max_value))
5530 return;
5532 tree_code code = gimple_assign_rhs_code (stmt);
5533 unsigned int nops = gimple_num_ops (stmt);
5535 if (!vect_truncatable_operation_p (code))
5536 /* Check that all relevant input operands are compatible, and update
5537 [MIN_VALUE, MAX_VALUE] to include their ranges. */
5538 for (unsigned int i = 1; i < nops; ++i)
5540 tree op = gimple_op (stmt, i);
5541 if (TREE_CODE (op) == INTEGER_CST)
5543 /* Don't require the integer to have RHS_TYPE (which it might
5544 not for things like shift amounts, etc.), but do require it
5545 to fit the type. */
5546 if (!int_fits_type_p (op, type))
5547 return;
5549 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
5550 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
5552 else if (TREE_CODE (op) == SSA_NAME)
5554 /* Ignore codes that don't take uniform arguments. */
5555 if (!types_compatible_p (TREE_TYPE (op), type))
5556 return;
5558 wide_int op_min_value, op_max_value;
5559 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5560 return;
5562 min_value = wi::min (min_value, op_min_value, sign);
5563 max_value = wi::max (max_value, op_max_value, sign);
5565 else
5566 return;
5569 /* Try to switch signed types for unsigned types if we can.
5570 This is better for two reasons. First, unsigned ops tend
5571 to be cheaper than signed ops. Second, it means that we can
5572 handle things like:
5574 signed char c;
5575 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5579 signed char c;
5580 unsigned short res_1 = (unsigned short) c & 0xff00;
5581 int res = (int) res_1;
5583 where the intermediate result res_1 has unsigned rather than
5584 signed type. */
5585 if (sign == SIGNED && !wi::neg_p (min_value))
5586 sign = UNSIGNED;
5588 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
5589 unsigned int precision1 = wi::min_precision (min_value, sign);
5590 unsigned int precision2 = wi::min_precision (max_value, sign);
5591 unsigned int value_precision = MAX (precision1, precision2);
5592 if (value_precision >= precision)
5593 return;
5595 if (dump_enabled_p ())
5596 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5597 " without loss of precision: %G",
5598 sign == SIGNED ? "signed" : "unsigned",
5599 value_precision, (gimple *) stmt);
5601 vect_set_operation_type (stmt_info, type, value_precision, sign);
5602 vect_set_min_input_precision (stmt_info, type, value_precision);
5605 /* Use information about the users of STMT's result to decide whether
5606 STMT (described by STMT_INFO) could be done in a narrower type.
5607 This is effectively a backward propagation. */
5609 static void
5610 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5612 tree_code code = gimple_assign_rhs_code (stmt);
5613 unsigned int opno = (code == COND_EXPR ? 2 : 1);
5614 tree type = TREE_TYPE (gimple_op (stmt, opno));
5615 if (!vect_narrowable_type_p (type))
5616 return;
5618 unsigned int precision = TYPE_PRECISION (type);
5619 unsigned int operation_precision, min_input_precision;
5620 switch (code)
5622 CASE_CONVERT:
5623 /* Only the bits that contribute to the output matter. Don't change
5624 the precision of the operation itself. */
5625 operation_precision = precision;
5626 min_input_precision = stmt_info->min_output_precision;
5627 break;
5629 case LSHIFT_EXPR:
5630 case RSHIFT_EXPR:
5632 tree shift = gimple_assign_rhs2 (stmt);
5633 if (TREE_CODE (shift) != INTEGER_CST
5634 || !wi::ltu_p (wi::to_widest (shift), precision))
5635 return;
5636 unsigned int const_shift = TREE_INT_CST_LOW (shift);
5637 if (code == LSHIFT_EXPR)
5639 /* Avoid creating an undefined shift.
5641 ??? We could instead use min_output_precision as-is and
5642 optimize out-of-range shifts to zero. However, only
5643 degenerate testcases shift away all their useful input data,
5644 and it isn't natural to drop input operations in the middle
5645 of vectorization. This sort of thing should really be
5646 handled before vectorization. */
5647 operation_precision = MAX (stmt_info->min_output_precision,
5648 const_shift + 1);
5649 /* We need CONST_SHIFT fewer bits of the input. */
5650 min_input_precision = (MAX (operation_precision, const_shift)
5651 - const_shift);
5653 else
5655 /* We need CONST_SHIFT extra bits to do the operation. */
5656 operation_precision = (stmt_info->min_output_precision
5657 + const_shift);
5658 min_input_precision = operation_precision;
5660 break;
5663 default:
5664 if (vect_truncatable_operation_p (code))
5666 /* Input bit N has no effect on output bits N-1 and lower. */
5667 operation_precision = stmt_info->min_output_precision;
5668 min_input_precision = operation_precision;
5669 break;
5671 return;
5674 if (operation_precision < precision)
5676 if (dump_enabled_p ())
5677 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5678 " without affecting users: %G",
5679 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5680 operation_precision, (gimple *) stmt);
5681 vect_set_operation_type (stmt_info, type, operation_precision,
5682 TYPE_SIGN (type));
5684 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5687 /* Return true if the statement described by STMT_INFO sets a boolean
5688 SSA_NAME and if we know how to vectorize this kind of statement using
5689 vector mask types. */
5691 static bool
5692 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5694 tree lhs = gimple_get_lhs (stmt_info->stmt);
5695 if (!lhs
5696 || TREE_CODE (lhs) != SSA_NAME
5697 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5698 return false;
5700 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5702 tree_code rhs_code = gimple_assign_rhs_code (assign);
5703 switch (rhs_code)
5705 CASE_CONVERT:
5706 case SSA_NAME:
5707 case BIT_NOT_EXPR:
5708 case BIT_IOR_EXPR:
5709 case BIT_XOR_EXPR:
5710 case BIT_AND_EXPR:
5711 return true;
5713 default:
5714 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5717 else if (is_a <gphi *> (stmt_info->stmt))
5718 return true;
5719 return false;
5722 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5723 a vector mask type instead of a normal vector type. Record the
5724 result in STMT_INFO->mask_precision. */
5726 static void
5727 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5729 if (!possible_vector_mask_operation_p (stmt_info))
5730 return;
5732 /* If at least one boolean input uses a vector mask type,
5733 pick the mask type with the narrowest elements.
5735 ??? This is the traditional behavior. It should always produce
5736 the smallest number of operations, but isn't necessarily the
5737 optimal choice. For example, if we have:
5739 a = b & c
5741 where:
5743 - the user of a wants it to have a mask type for 16-bit elements (M16)
5744 - b also uses M16
5745 - c uses a mask type for 8-bit elements (M8)
5747 then picking M8 gives:
5749 - 1 M16->M8 pack for b
5750 - 1 M8 AND for a
5751 - 2 M8->M16 unpacks for the user of a
5753 whereas picking M16 would have given:
5755 - 2 M8->M16 unpacks for c
5756 - 2 M16 ANDs for a
5758 The number of operations are equal, but M16 would have given
5759 a shorter dependency chain and allowed more ILP. */
5760 unsigned int precision = ~0U;
5761 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5763 unsigned int nops = gimple_num_ops (assign);
5764 for (unsigned int i = 1; i < nops; ++i)
5766 tree rhs = gimple_op (assign, i);
5767 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5768 continue;
5770 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5771 if (!def_stmt_info)
5772 /* Don't let external or constant operands influence the choice.
5773 We can convert them to whichever vector type we pick. */
5774 continue;
5776 if (def_stmt_info->mask_precision)
5778 if (precision > def_stmt_info->mask_precision)
5779 precision = def_stmt_info->mask_precision;
5783 /* If the statement compares two values that shouldn't use vector masks,
5784 try comparing the values as normal scalars instead. */
5785 tree_code rhs_code = gimple_assign_rhs_code (assign);
5786 if (precision == ~0U
5787 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5789 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5790 scalar_mode mode;
5791 tree vectype, mask_type;
5792 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5793 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5794 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5795 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5796 precision = GET_MODE_BITSIZE (mode);
5799 else
5801 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5802 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5804 tree rhs = gimple_phi_arg_def (phi, i);
5806 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5807 if (!def_stmt_info)
5808 /* Don't let external or constant operands influence the choice.
5809 We can convert them to whichever vector type we pick. */
5810 continue;
5812 if (def_stmt_info->mask_precision)
5814 if (precision > def_stmt_info->mask_precision)
5815 precision = def_stmt_info->mask_precision;
5820 if (dump_enabled_p ())
5822 if (precision == ~0U)
5823 dump_printf_loc (MSG_NOTE, vect_location,
5824 "using normal nonmask vectors for %G",
5825 stmt_info->stmt);
5826 else
5827 dump_printf_loc (MSG_NOTE, vect_location,
5828 "using boolean precision %d for %G",
5829 precision, stmt_info->stmt);
5832 stmt_info->mask_precision = precision;
5835 /* Handle vect_determine_precisions for STMT_INFO, given that we
5836 have already done so for the users of its result. */
5838 void
5839 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5841 vect_determine_min_output_precision (vinfo, stmt_info);
5842 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5844 vect_determine_precisions_from_range (stmt_info, stmt);
5845 vect_determine_precisions_from_users (stmt_info, stmt);
5849 /* Walk backwards through the vectorizable region to determine the
5850 values of these fields:
5852 - min_output_precision
5853 - min_input_precision
5854 - operation_precision
5855 - operation_sign. */
5857 void
5858 vect_determine_precisions (vec_info *vinfo)
5860 DUMP_VECT_SCOPE ("vect_determine_precisions");
5862 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5864 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5865 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5866 unsigned int nbbs = loop->num_nodes;
5868 for (unsigned int i = 0; i < nbbs; i++)
5870 basic_block bb = bbs[i];
5871 for (auto gsi = gsi_start_phis (bb);
5872 !gsi_end_p (gsi); gsi_next (&gsi))
5874 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5875 if (stmt_info)
5876 vect_determine_mask_precision (vinfo, stmt_info);
5878 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5879 if (!is_gimple_debug (gsi_stmt (si)))
5880 vect_determine_mask_precision
5881 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5883 for (unsigned int i = 0; i < nbbs; i++)
5885 basic_block bb = bbs[nbbs - i - 1];
5886 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5887 !gsi_end_p (si); gsi_prev (&si))
5888 if (!is_gimple_debug (gsi_stmt (si)))
5889 vect_determine_stmt_precisions
5890 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5891 for (auto gsi = gsi_start_phis (bb);
5892 !gsi_end_p (gsi); gsi_next (&gsi))
5894 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5895 if (stmt_info)
5896 vect_determine_stmt_precisions (vinfo, stmt_info);
5900 else
5902 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5903 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5905 basic_block bb = bb_vinfo->bbs[i];
5906 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5908 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5909 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5910 vect_determine_mask_precision (vinfo, stmt_info);
5912 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5914 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5915 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5916 vect_determine_mask_precision (vinfo, stmt_info);
5919 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5921 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5922 !gsi_end_p (gsi); gsi_prev (&gsi))
5924 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5925 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5926 vect_determine_stmt_precisions (vinfo, stmt_info);
5928 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5929 !gsi_end_p (gsi); gsi_next (&gsi))
5931 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5932 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5933 vect_determine_stmt_precisions (vinfo, stmt_info);
5939 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5941 struct vect_recog_func
5943 vect_recog_func_ptr fn;
5944 const char *name;
5947 /* Note that ordering matters - the first pattern matching on a stmt is
5948 taken which means usually the more complex one needs to preceed the
5949 less comples onex (widen_sum only after dot_prod or sad for example). */
5950 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5951 { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
5952 { vect_recog_bit_insert_pattern, "bit_insert" },
5953 { vect_recog_over_widening_pattern, "over_widening" },
5954 /* Must come after over_widening, which narrows the shift as much as
5955 possible beforehand. */
5956 { vect_recog_average_pattern, "average" },
5957 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
5958 { vect_recog_mulhs_pattern, "mult_high" },
5959 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5960 { vect_recog_widen_mult_pattern, "widen_mult" },
5961 { vect_recog_dot_prod_pattern, "dot_prod" },
5962 { vect_recog_sad_pattern, "sad" },
5963 { vect_recog_widen_sum_pattern, "widen_sum" },
5964 { vect_recog_pow_pattern, "pow" },
5965 { vect_recog_popcount_pattern, "popcount" },
5966 { vect_recog_widen_shift_pattern, "widen_shift" },
5967 { vect_recog_rotate_pattern, "rotate" },
5968 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5969 { vect_recog_divmod_pattern, "divmod" },
5970 { vect_recog_mult_pattern, "mult" },
5971 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5972 { vect_recog_bool_pattern, "bool" },
5973 /* This must come before mask conversion, and includes the parts
5974 of mask conversion that are needed for gather and scatter
5975 internal functions. */
5976 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5977 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5978 { vect_recog_widen_plus_pattern, "widen_plus" },
5979 { vect_recog_widen_minus_pattern, "widen_minus" },
5982 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5984 /* Mark statements that are involved in a pattern. */
5986 void
5987 vect_mark_pattern_stmts (vec_info *vinfo,
5988 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5989 tree pattern_vectype)
5991 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5992 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5994 gimple *orig_pattern_stmt = NULL;
5995 if (is_pattern_stmt_p (orig_stmt_info))
5997 /* We're replacing a statement in an existing pattern definition
5998 sequence. */
5999 orig_pattern_stmt = orig_stmt_info->stmt;
6000 if (dump_enabled_p ())
6001 dump_printf_loc (MSG_NOTE, vect_location,
6002 "replacing earlier pattern %G", orig_pattern_stmt);
6004 /* To keep the book-keeping simple, just swap the lhs of the
6005 old and new statements, so that the old one has a valid but
6006 unused lhs. */
6007 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
6008 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
6009 gimple_set_lhs (pattern_stmt, old_lhs);
6011 if (dump_enabled_p ())
6012 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
6014 /* Switch to the statement that ORIG replaces. */
6015 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
6017 /* We shouldn't be replacing the main pattern statement. */
6018 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
6019 != orig_pattern_stmt);
6022 if (def_seq)
6023 for (gimple_stmt_iterator si = gsi_start (def_seq);
6024 !gsi_end_p (si); gsi_next (&si))
6026 if (dump_enabled_p ())
6027 dump_printf_loc (MSG_NOTE, vect_location,
6028 "extra pattern stmt: %G", gsi_stmt (si));
6029 stmt_vec_info pattern_stmt_info
6030 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
6031 orig_stmt_info, pattern_vectype);
6032 /* Stmts in the def sequence are not vectorizable cycle or
6033 induction defs, instead they should all be vect_internal_def
6034 feeding the main pattern stmt which retains this def type. */
6035 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
6038 if (orig_pattern_stmt)
6040 vect_init_pattern_stmt (vinfo, pattern_stmt,
6041 orig_stmt_info, pattern_vectype);
6043 /* Insert all the new pattern statements before the original one. */
6044 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6045 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
6046 orig_def_seq);
6047 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
6048 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
6050 /* Remove the pattern statement that this new pattern replaces. */
6051 gsi_remove (&gsi, false);
6053 else
6054 vect_set_pattern_stmt (vinfo,
6055 pattern_stmt, orig_stmt_info, pattern_vectype);
6057 /* Transfer reduction path info to the pattern. */
6058 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
6060 gimple_match_op op;
6061 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
6062 gcc_unreachable ();
6063 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
6064 /* Search the pattern def sequence and the main pattern stmt. Note
6065 we may have inserted all into a containing pattern def sequence
6066 so the following is a bit awkward. */
6067 gimple_stmt_iterator si;
6068 gimple *s;
6069 if (def_seq)
6071 si = gsi_start (def_seq);
6072 s = gsi_stmt (si);
6073 gsi_next (&si);
6075 else
6077 si = gsi_none ();
6078 s = pattern_stmt;
6082 bool found = false;
6083 if (gimple_extract_op (s, &op))
6084 for (unsigned i = 0; i < op.num_ops; ++i)
6085 if (op.ops[i] == lookfor)
6087 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
6088 lookfor = gimple_get_lhs (s);
6089 found = true;
6090 break;
6092 if (s == pattern_stmt)
6094 if (!found && dump_enabled_p ())
6095 dump_printf_loc (MSG_NOTE, vect_location,
6096 "failed to update reduction index.\n");
6097 break;
6099 if (gsi_end_p (si))
6100 s = pattern_stmt;
6101 else
6103 s = gsi_stmt (si);
6104 if (s == pattern_stmt)
6105 /* Found the end inside a bigger pattern def seq. */
6106 si = gsi_none ();
6107 else
6108 gsi_next (&si);
6110 } while (1);
6114 /* Function vect_pattern_recog_1
6116 Input:
6117 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
6118 computation pattern.
6119 STMT_INFO: A stmt from which the pattern search should start.
6121 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
6122 a sequence of statements that has the same functionality and can be
6123 used to replace STMT_INFO. It returns the last statement in the sequence
6124 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
6125 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
6126 statement, having first checked that the target supports the new operation
6127 in that type.
6129 This function also does some bookkeeping, as explained in the documentation
6130 for vect_recog_pattern. */
6132 static void
6133 vect_pattern_recog_1 (vec_info *vinfo,
6134 vect_recog_func *recog_func, stmt_vec_info stmt_info)
6136 gimple *pattern_stmt;
6137 loop_vec_info loop_vinfo;
6138 tree pattern_vectype;
6140 /* If this statement has already been replaced with pattern statements,
6141 leave the original statement alone, since the first match wins.
6142 Instead try to match against the definition statements that feed
6143 the main pattern statement. */
6144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6146 gimple_stmt_iterator gsi;
6147 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6148 !gsi_end_p (gsi); gsi_next (&gsi))
6149 vect_pattern_recog_1 (vinfo, recog_func,
6150 vinfo->lookup_stmt (gsi_stmt (gsi)));
6151 return;
6154 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6155 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
6156 if (!pattern_stmt)
6158 /* Clear any half-formed pattern definition sequence. */
6159 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
6160 return;
6163 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6165 /* Found a vectorizable pattern. */
6166 if (dump_enabled_p ())
6167 dump_printf_loc (MSG_NOTE, vect_location,
6168 "%s pattern recognized: %G",
6169 recog_func->name, pattern_stmt);
6171 /* Mark the stmts that are involved in the pattern. */
6172 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
6174 /* Patterns cannot be vectorized using SLP, because they change the order of
6175 computation. */
6176 if (loop_vinfo)
6178 unsigned ix, ix2;
6179 stmt_vec_info *elem_ptr;
6180 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
6181 elem_ptr, *elem_ptr == stmt_info);
6186 /* Function vect_pattern_recog
6188 Input:
6189 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
6190 computation idioms.
6192 Output - for each computation idiom that is detected we create a new stmt
6193 that provides the same functionality and that can be vectorized. We
6194 also record some information in the struct_stmt_info of the relevant
6195 stmts, as explained below:
6197 At the entry to this function we have the following stmts, with the
6198 following initial value in the STMT_VINFO fields:
6200 stmt in_pattern_p related_stmt vec_stmt
6201 S1: a_i = .... - - -
6202 S2: a_2 = ..use(a_i).. - - -
6203 S3: a_1 = ..use(a_2).. - - -
6204 S4: a_0 = ..use(a_1).. - - -
6205 S5: ... = ..use(a_0).. - - -
6207 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
6208 represented by a single stmt. We then:
6209 - create a new stmt S6 equivalent to the pattern (the stmt is not
6210 inserted into the code)
6211 - fill in the STMT_VINFO fields as follows:
6213 in_pattern_p related_stmt vec_stmt
6214 S1: a_i = .... - - -
6215 S2: a_2 = ..use(a_i).. - - -
6216 S3: a_1 = ..use(a_2).. - - -
6217 S4: a_0 = ..use(a_1).. true S6 -
6218 '---> S6: a_new = .... - S4 -
6219 S5: ... = ..use(a_0).. - - -
6221 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
6222 to each other through the RELATED_STMT field).
6224 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
6225 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
6226 remain irrelevant unless used by stmts other than S4.
6228 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
6229 (because they are marked as irrelevant). It will vectorize S6, and record
6230 a pointer to the new vector stmt VS6 from S6 (as usual).
6231 S4 will be skipped, and S5 will be vectorized as usual:
6233 in_pattern_p related_stmt vec_stmt
6234 S1: a_i = .... - - -
6235 S2: a_2 = ..use(a_i).. - - -
6236 S3: a_1 = ..use(a_2).. - - -
6237 > VS6: va_new = .... - - -
6238 S4: a_0 = ..use(a_1).. true S6 VS6
6239 '---> S6: a_new = .... - S4 VS6
6240 > VS5: ... = ..vuse(va_new).. - - -
6241 S5: ... = ..use(a_0).. - - -
6243 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
6244 elsewhere), and we'll end up with:
6246 VS6: va_new = ....
6247 VS5: ... = ..vuse(va_new)..
6249 In case of more than one pattern statements, e.g., widen-mult with
6250 intermediate type:
6252 S1 a_t = ;
6253 S2 a_T = (TYPE) a_t;
6254 '--> S3: a_it = (interm_type) a_t;
6255 S4 prod_T = a_T * CONST;
6256 '--> S5: prod_T' = a_it w* CONST;
6258 there may be other users of a_T outside the pattern. In that case S2 will
6259 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
6260 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
6261 be recorded in S3. */
6263 void
6264 vect_pattern_recog (vec_info *vinfo)
6266 class loop *loop;
6267 basic_block *bbs;
6268 unsigned int nbbs;
6269 gimple_stmt_iterator si;
6270 unsigned int i, j;
6272 vect_determine_precisions (vinfo);
6274 DUMP_VECT_SCOPE ("vect_pattern_recog");
6276 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
6278 loop = LOOP_VINFO_LOOP (loop_vinfo);
6279 bbs = LOOP_VINFO_BBS (loop_vinfo);
6280 nbbs = loop->num_nodes;
6282 /* Scan through the loop stmts, applying the pattern recognition
6283 functions starting at each stmt visited: */
6284 for (i = 0; i < nbbs; i++)
6286 basic_block bb = bbs[i];
6287 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
6289 if (is_gimple_debug (gsi_stmt (si)))
6290 continue;
6291 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
6292 /* Scan over all generic vect_recog_xxx_pattern functions. */
6293 for (j = 0; j < NUM_PATTERNS; j++)
6294 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
6295 stmt_info);
6299 else
6301 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6302 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6303 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
6304 !gsi_end_p (gsi); gsi_next (&gsi))
6306 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
6307 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
6308 continue;
6310 /* Scan over all generic vect_recog_xxx_pattern functions. */
6311 for (j = 0; j < NUM_PATTERNS; j++)
6312 vect_pattern_recog_1 (vinfo,
6313 &vect_vect_recog_func_ptrs[j], stmt_info);
6317 /* After this no more add_stmt calls are allowed. */
6318 vinfo->stmt_vec_info_ro = true;