re PR tree-optimization/89007 ([SVE] Implement generic vector average expansion)
[official-gcc.git] / gcc / tree-vect-patterns.c
blobc9ad9e0eb9455901120c82917f08843eb21d9e73
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
52 /* Return true if we have a useful VR_RANGE range for VAR, storing it
53 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
55 static bool
56 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
58 value_range_kind vr_type = get_range_info (var, min_value, max_value);
59 wide_int nonzero = get_nonzero_bits (var);
60 signop sgn = TYPE_SIGN (TREE_TYPE (var));
61 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 nonzero, sgn) == VR_RANGE)
64 if (dump_enabled_p ())
66 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 dump_printf (MSG_NOTE, " has range [");
68 dump_hex (MSG_NOTE, *min_value);
69 dump_printf (MSG_NOTE, ", ");
70 dump_hex (MSG_NOTE, *max_value);
71 dump_printf (MSG_NOTE, "]\n");
73 return true;
75 else
77 if (dump_enabled_p ())
79 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 dump_printf (MSG_NOTE, " has no range info\n");
82 return false;
86 /* Report that we've found an instance of pattern PATTERN in
87 statement STMT. */
89 static void
90 vect_pattern_detected (const char *name, gimple *stmt)
92 if (dump_enabled_p ())
93 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
96 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97 return the pattern statement's stmt_vec_info. Set its vector type to
98 VECTYPE if it doesn't have one already. */
100 static stmt_vec_info
101 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
102 tree vectype)
104 vec_info *vinfo = orig_stmt_info->vinfo;
105 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
106 if (pattern_stmt_info == NULL)
107 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
108 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
110 pattern_stmt_info->pattern_stmt_p = true;
111 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
112 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
113 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
114 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
116 gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
117 == vect_use_mask_type_p (orig_stmt_info));
118 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
119 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
121 return pattern_stmt_info;
124 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
125 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
126 have one already. */
128 static void
129 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
130 tree vectype)
132 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
133 STMT_VINFO_RELATED_STMT (orig_stmt_info)
134 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
137 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
138 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
139 be different from the vector type of the final pattern statement.
140 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
141 from which it was derived. */
143 static inline void
144 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
145 tree vectype = NULL_TREE,
146 tree scalar_type_for_mask = NULL_TREE)
148 gcc_assert (!scalar_type_for_mask
149 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
150 vec_info *vinfo = stmt_info->vinfo;
151 if (vectype)
153 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
154 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
155 if (scalar_type_for_mask)
156 new_stmt_info->mask_precision
157 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
159 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
160 new_stmt);
163 /* The caller wants to perform new operations on vect_external variable
164 VAR, so that the result of the operations would also be vect_external.
165 Return the edge on which the operations can be performed, if one exists.
166 Return null if the operations should instead be treated as part of
167 the pattern that needs them. */
169 static edge
170 vect_get_external_def_edge (vec_info *vinfo, tree var)
172 edge e = NULL;
173 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
175 e = loop_preheader_edge (loop_vinfo->loop);
176 if (!SSA_NAME_IS_DEFAULT_DEF (var))
178 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
179 if (bb == NULL
180 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
181 e = NULL;
184 return e;
187 /* Return true if the target supports a vector version of CODE,
188 where CODE is known to map to a direct optab. ITYPE specifies
189 the type of (some of) the scalar inputs and OTYPE specifies the
190 type of the scalar result.
192 If CODE allows the inputs and outputs to have different type
193 (such as for WIDEN_SUM_EXPR), it is the input mode rather
194 than the output mode that determines the appropriate target pattern.
195 Operand 0 of the target pattern then specifies the mode that the output
196 must have.
198 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
199 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
200 is nonnull. */
202 static bool
203 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
204 tree itype, tree *vecotype_out,
205 tree *vecitype_out = NULL)
207 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
208 if (!vecitype)
209 return false;
211 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
212 if (!vecotype)
213 return false;
215 optab optab = optab_for_tree_code (code, vecitype, optab_default);
216 if (!optab)
217 return false;
219 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
220 if (icode == CODE_FOR_nothing
221 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
222 return false;
224 *vecotype_out = vecotype;
225 if (vecitype_out)
226 *vecitype_out = vecitype;
227 return true;
230 /* Round bit precision PRECISION up to a full element. */
232 static unsigned int
233 vect_element_precision (unsigned int precision)
235 precision = 1 << ceil_log2 (precision);
236 return MAX (precision, BITS_PER_UNIT);
239 /* If OP is defined by a statement that's being considered for vectorization,
240 return information about that statement, otherwise return NULL. */
242 static stmt_vec_info
243 vect_get_internal_def (vec_info *vinfo, tree op)
245 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
246 if (def_stmt_info
247 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
248 return def_stmt_info;
249 return NULL;
252 /* Check whether NAME, an ssa-name used in STMT_VINFO,
253 is a result of a type promotion, such that:
254 DEF_STMT: NAME = NOP (name0)
255 If CHECK_SIGN is TRUE, check that either both types are signed or both are
256 unsigned. */
258 static bool
259 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
260 tree *orig_type, gimple **def_stmt, bool *promotion)
262 tree type = TREE_TYPE (name);
263 tree oprnd0;
264 enum vect_def_type dt;
266 stmt_vec_info def_stmt_info;
267 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
268 def_stmt))
269 return false;
271 if (dt != vect_internal_def
272 && dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 if (!*def_stmt)
276 return false;
278 if (!is_gimple_assign (*def_stmt))
279 return false;
281 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
282 return false;
284 oprnd0 = gimple_assign_rhs1 (*def_stmt);
286 *orig_type = TREE_TYPE (oprnd0);
287 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
288 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
289 return false;
291 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
292 *promotion = true;
293 else
294 *promotion = false;
296 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
297 return false;
299 return true;
302 /* Holds information about an input operand after some sign changes
303 and type promotions have been peeled away. */
304 class vect_unpromoted_value {
305 public:
306 vect_unpromoted_value ();
308 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
310 /* The value obtained after peeling away zero or more casts. */
311 tree op;
313 /* The type of OP. */
314 tree type;
316 /* The definition type of OP. */
317 vect_def_type dt;
319 /* If OP is the result of peeling at least one cast, and if the cast
320 of OP itself is a vectorizable statement, CASTER identifies that
321 statement, otherwise it is null. */
322 stmt_vec_info caster;
325 inline vect_unpromoted_value::vect_unpromoted_value ()
326 : op (NULL_TREE),
327 type (NULL_TREE),
328 dt (vect_uninitialized_def),
329 caster (NULL)
333 /* Set the operand to OP_IN, its definition type to DT_IN, and the
334 statement that casts it to CASTER_IN. */
336 inline void
337 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
338 stmt_vec_info caster_in)
340 op = op_in;
341 type = TREE_TYPE (op);
342 dt = dt_in;
343 caster = caster_in;
346 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
347 to reach some vectorizable inner operand OP', continuing as long as it
348 is possible to convert OP' back to OP using a possible sign change
349 followed by a possible promotion P. Return this OP', or null if OP is
350 not a vectorizable SSA name. If there is a promotion P, describe its
351 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
352 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
353 have more than one user.
355 A successful return means that it is possible to go from OP' to OP
356 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
357 whereas the cast from UNPROM to OP might be a promotion, a sign
358 change, or a nop.
360 E.g. say we have:
362 signed short *ptr = ...;
363 signed short C = *ptr;
364 unsigned short B = (unsigned short) C; // sign change
365 signed int A = (signed int) B; // unsigned promotion
366 ...possible other uses of A...
367 unsigned int OP = (unsigned int) A; // sign change
369 In this case it's possible to go directly from C to OP using:
371 OP = (unsigned int) (unsigned short) C;
372 +------------+ +--------------+
373 promotion sign change
375 so OP' would be C. The input to the promotion is B, so UNPROM
376 would describe B. */
378 static tree
379 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
380 vect_unpromoted_value *unprom,
381 bool *single_use_p = NULL)
383 tree res = NULL_TREE;
384 tree op_type = TREE_TYPE (op);
385 unsigned int orig_precision = TYPE_PRECISION (op_type);
386 unsigned int min_precision = orig_precision;
387 stmt_vec_info caster = NULL;
388 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
390 /* See whether OP is simple enough to vectorize. */
391 stmt_vec_info def_stmt_info;
392 gimple *def_stmt;
393 vect_def_type dt;
394 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
395 break;
397 /* If OP is the input of a demotion, skip over it to see whether
398 OP is itself the result of a promotion. If so, the combined
399 effect of the promotion and the demotion might fit the required
400 pattern, otherwise neither operation fits.
402 This copes with cases such as the result of an arithmetic
403 operation being truncated before being stored, and where that
404 arithmetic operation has been recognized as an over-widened one. */
405 if (TYPE_PRECISION (op_type) <= min_precision)
407 /* Use OP as the UNPROM described above if we haven't yet
408 found a promotion, or if using the new input preserves the
409 sign of the previous promotion. */
410 if (!res
411 || TYPE_PRECISION (unprom->type) == orig_precision
412 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
414 unprom->set_op (op, dt, caster);
415 min_precision = TYPE_PRECISION (op_type);
417 /* Stop if we've already seen a promotion and if this
418 conversion does more than change the sign. */
419 else if (TYPE_PRECISION (op_type)
420 != TYPE_PRECISION (unprom->type))
421 break;
423 /* The sequence now extends to OP. */
424 res = op;
427 /* See whether OP is defined by a cast. Record it as CASTER if
428 the cast is potentially vectorizable. */
429 if (!def_stmt)
430 break;
431 caster = def_stmt_info;
433 /* Ignore pattern statements, since we don't link uses for them. */
434 if (caster
435 && single_use_p
436 && !STMT_VINFO_RELATED_STMT (caster)
437 && !has_single_use (res))
438 *single_use_p = false;
440 gassign *assign = dyn_cast <gassign *> (def_stmt);
441 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
442 break;
444 /* Continue with the input to the cast. */
445 op = gimple_assign_rhs1 (def_stmt);
446 op_type = TREE_TYPE (op);
448 return res;
451 /* OP is an integer operand to an operation that returns TYPE, and we
452 want to treat the operation as a widening one. So far we can treat
453 it as widening from *COMMON_TYPE.
455 Return true if OP is suitable for such a widening operation,
456 either widening from *COMMON_TYPE or from some supertype of it.
457 Update *COMMON_TYPE to the supertype in the latter case.
459 SHIFT_P is true if OP is a shift amount. */
461 static bool
462 vect_joust_widened_integer (tree type, bool shift_p, tree op,
463 tree *common_type)
465 /* Calculate the minimum precision required by OP, without changing
466 the sign of either operand. */
467 unsigned int precision;
468 if (shift_p)
470 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
471 return false;
472 precision = TREE_INT_CST_LOW (op);
474 else
476 precision = wi::min_precision (wi::to_widest (op),
477 TYPE_SIGN (*common_type));
478 if (precision * 2 > TYPE_PRECISION (type))
479 return false;
482 /* If OP requires a wider type, switch to that type. The checks
483 above ensure that this is still narrower than the result. */
484 precision = vect_element_precision (precision);
485 if (TYPE_PRECISION (*common_type) < precision)
486 *common_type = build_nonstandard_integer_type
487 (precision, TYPE_UNSIGNED (*common_type));
488 return true;
491 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
492 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
494 static bool
495 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
497 if (types_compatible_p (*common_type, new_type))
498 return true;
500 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
501 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
502 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
503 return true;
505 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
506 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
507 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
509 *common_type = new_type;
510 return true;
513 /* We have mismatched signs, with the signed type being
514 no wider than the unsigned type. In this case we need
515 a wider signed type. */
516 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
517 TYPE_PRECISION (new_type));
518 precision *= 2;
519 if (precision * 2 > TYPE_PRECISION (type))
520 return false;
522 *common_type = build_nonstandard_integer_type (precision, false);
523 return true;
526 /* Check whether STMT_INFO can be viewed as a tree of integer operations
527 in which each node either performs CODE or WIDENED_CODE, and where
528 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
529 specifies the maximum number of leaf operands. SHIFT_P says whether
530 CODE and WIDENED_CODE are some sort of shift.
532 If STMT_INFO is such a tree, return the number of leaf operands
533 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
534 to a type that (a) is narrower than the result of STMT_INFO and
535 (b) can hold all leaf operand values.
537 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
538 exists. */
540 static unsigned int
541 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
542 tree_code widened_code, bool shift_p,
543 unsigned int max_nops,
544 vect_unpromoted_value *unprom, tree *common_type)
546 /* Check for an integer operation with the right code. */
547 vec_info *vinfo = stmt_info->vinfo;
548 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
549 if (!assign)
550 return 0;
552 tree_code rhs_code = gimple_assign_rhs_code (assign);
553 if (rhs_code != code && rhs_code != widened_code)
554 return 0;
556 tree type = gimple_expr_type (assign);
557 if (!INTEGRAL_TYPE_P (type))
558 return 0;
560 /* Assume that both operands will be leaf operands. */
561 max_nops -= 2;
563 /* Check the operands. */
564 unsigned int next_op = 0;
565 for (unsigned int i = 0; i < 2; ++i)
567 vect_unpromoted_value *this_unprom = &unprom[next_op];
568 unsigned int nops = 1;
569 tree op = gimple_op (assign, i + 1);
570 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
572 /* We already have a common type from earlier operands.
573 Update it to account for OP. */
574 this_unprom->set_op (op, vect_constant_def);
575 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
576 return 0;
578 else
580 /* Only allow shifts by constants. */
581 if (shift_p && i == 1)
582 return 0;
584 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
585 this_unprom))
586 return 0;
588 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
590 /* The operand isn't widened. If STMT_INFO has the code
591 for an unwidened operation, recursively check whether
592 this operand is a node of the tree. */
593 if (rhs_code != code
594 || max_nops == 0
595 || this_unprom->dt != vect_internal_def)
596 return 0;
598 /* Give back the leaf slot allocated above now that we're
599 not treating this as a leaf operand. */
600 max_nops += 1;
602 /* Recursively process the definition of the operand. */
603 stmt_vec_info def_stmt_info
604 = vinfo->lookup_def (this_unprom->op);
605 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
606 shift_p, max_nops, this_unprom,
607 common_type);
608 if (nops == 0)
609 return 0;
611 max_nops -= nops;
613 else
615 /* Make sure that the operand is narrower than the result. */
616 if (TYPE_PRECISION (this_unprom->type) * 2
617 > TYPE_PRECISION (type))
618 return 0;
620 /* Update COMMON_TYPE for the new operand. */
621 if (i == 0)
622 *common_type = this_unprom->type;
623 else if (!vect_joust_widened_type (type, this_unprom->type,
624 common_type))
625 return 0;
628 next_op += nops;
630 return next_op;
633 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
634 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
636 static tree
637 vect_recog_temp_ssa_var (tree type, gimple *stmt)
639 return make_temp_ssa_name (type, stmt, "patt");
642 /* STMT2_INFO describes a type conversion that could be split into STMT1
643 followed by a version of STMT2_INFO that takes NEW_RHS as its first
644 input. Try to do this using pattern statements, returning true on
645 success. */
647 static bool
648 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
649 gimple *stmt1, tree vectype)
651 vec_info *vinfo = stmt2_info->vinfo;
652 if (is_pattern_stmt_p (stmt2_info))
654 /* STMT2_INFO is part of a pattern. Get the statement to which
655 the pattern is attached. */
656 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
657 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location,
661 "Splitting pattern statement: %G", stmt2_info->stmt);
663 /* Since STMT2_INFO is a pattern statement, we can change it
664 in-situ without worrying about changing the code for the
665 containing block. */
666 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
668 if (dump_enabled_p ())
670 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
671 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
672 stmt2_info->stmt);
675 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
676 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
677 /* STMT2_INFO is the actual pattern statement. Add STMT1
678 to the end of the definition sequence. */
679 gimple_seq_add_stmt_without_update (def_seq, stmt1);
680 else
682 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
683 before it. */
684 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
685 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
687 return true;
689 else
691 /* STMT2_INFO doesn't yet have a pattern. Try to create a
692 two-statement pattern now. */
693 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
694 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
695 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
696 if (!lhs_vectype)
697 return false;
699 if (dump_enabled_p ())
700 dump_printf_loc (MSG_NOTE, vect_location,
701 "Splitting statement: %G", stmt2_info->stmt);
703 /* Add STMT1 as a singleton pattern definition sequence. */
704 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
705 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
706 gimple_seq_add_stmt_without_update (def_seq, stmt1);
708 /* Build the second of the two pattern statements. */
709 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
710 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
711 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
713 if (dump_enabled_p ())
715 dump_printf_loc (MSG_NOTE, vect_location,
716 "into pattern statements: %G", stmt1);
717 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
720 return true;
724 /* Convert UNPROM to TYPE and return the result, adding new statements
725 to STMT_INFO's pattern definition statements if no better way is
726 available. VECTYPE is the vector form of TYPE. */
728 static tree
729 vect_convert_input (stmt_vec_info stmt_info, tree type,
730 vect_unpromoted_value *unprom, tree vectype)
732 vec_info *vinfo = stmt_info->vinfo;
734 /* Check for a no-op conversion. */
735 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
736 return unprom->op;
738 /* Allow the caller to create constant vect_unpromoted_values. */
739 if (TREE_CODE (unprom->op) == INTEGER_CST)
740 return wide_int_to_tree (type, wi::to_widest (unprom->op));
742 tree input = unprom->op;
743 if (unprom->caster)
745 tree lhs = gimple_get_lhs (unprom->caster->stmt);
746 tree lhs_type = TREE_TYPE (lhs);
748 /* If the result of the existing cast is the right width, use it
749 instead of the source of the cast. */
750 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
751 input = lhs;
752 /* If the precision we want is between the source and result
753 precisions of the existing cast, try splitting the cast into
754 two and tapping into a mid-way point. */
755 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
756 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
758 /* In order to preserve the semantics of the original cast,
759 give the mid-way point the same signedness as the input value.
761 It would be possible to use a signed type here instead if
762 TYPE is signed and UNPROM->TYPE is unsigned, but that would
763 make the sign of the midtype sensitive to the order in
764 which we process the statements, since the signedness of
765 TYPE is the signedness required by just one of possibly
766 many users. Also, unsigned promotions are usually as cheap
767 as or cheaper than signed ones, so it's better to keep an
768 unsigned promotion. */
769 tree midtype = build_nonstandard_integer_type
770 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
771 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
772 if (vec_midtype)
774 input = vect_recog_temp_ssa_var (midtype, NULL);
775 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
776 unprom->op);
777 if (!vect_split_statement (unprom->caster, input, new_stmt,
778 vec_midtype))
779 append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
783 /* See if we can reuse an existing result. */
784 if (types_compatible_p (type, TREE_TYPE (input)))
785 return input;
788 /* We need a new conversion statement. */
789 tree new_op = vect_recog_temp_ssa_var (type, NULL);
790 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
792 /* If OP is an external value, see if we can insert the new statement
793 on an incoming edge. */
794 if (input == unprom->op && unprom->dt == vect_external_def)
795 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
797 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
798 gcc_assert (!new_bb);
799 return new_op;
802 /* As a (common) last resort, add the statement to the pattern itself. */
803 append_pattern_def_seq (stmt_info, new_stmt, vectype);
804 return new_op;
807 /* Invoke vect_convert_input for N elements of UNPROM and store the
808 result in the corresponding elements of RESULT. */
810 static void
811 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
812 tree *result, tree type, vect_unpromoted_value *unprom,
813 tree vectype)
815 for (unsigned int i = 0; i < n; ++i)
817 unsigned int j;
818 for (j = 0; j < i; ++j)
819 if (unprom[j].op == unprom[i].op)
820 break;
821 if (j < i)
822 result[i] = result[j];
823 else
824 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
828 /* The caller has created a (possibly empty) sequence of pattern definition
829 statements followed by a single statement PATTERN_STMT. Cast the result
830 of this final statement to TYPE. If a new statement is needed, add
831 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
832 and return the new statement, otherwise return PATTERN_STMT as-is.
833 VECITYPE is the vector form of PATTERN_STMT's result type. */
835 static gimple *
836 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
837 tree vecitype)
839 tree lhs = gimple_get_lhs (pattern_stmt);
840 if (!types_compatible_p (type, TREE_TYPE (lhs)))
842 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
843 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
844 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
846 return pattern_stmt;
849 /* Return true if STMT_VINFO describes a reduction for which reassociation
850 is allowed. If STMT_INFO is part of a group, assume that it's part of
851 a reduction chain and optimistically assume that all statements
852 except the last allow reassociation.
853 Also require it to have code CODE and to be a reduction
854 in the outermost loop. When returning true, store the operands in
855 *OP0_OUT and *OP1_OUT. */
857 static bool
858 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
859 tree *op0_out, tree *op1_out)
861 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
862 if (!loop_info)
863 return false;
865 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
866 if (!assign || gimple_assign_rhs_code (assign) != code)
867 return false;
869 /* We don't allow changing the order of the computation in the inner-loop
870 when doing outer-loop vectorization. */
871 class loop *loop = LOOP_VINFO_LOOP (loop_info);
872 if (loop && nested_in_vect_loop_p (loop, stmt_info))
873 return false;
875 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
877 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
878 code))
879 return false;
881 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
882 return false;
884 *op0_out = gimple_assign_rhs1 (assign);
885 *op1_out = gimple_assign_rhs2 (assign);
886 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
887 std::swap (*op0_out, *op1_out);
888 return true;
891 /* Function vect_recog_dot_prod_pattern
893 Try to find the following pattern:
895 type x_t, y_t;
896 TYPE1 prod;
897 TYPE2 sum = init;
898 loop:
899 sum_0 = phi <init, sum_1>
900 S1 x_t = ...
901 S2 y_t = ...
902 S3 x_T = (TYPE1) x_t;
903 S4 y_T = (TYPE1) y_t;
904 S5 prod = x_T * y_T;
905 [S6 prod = (TYPE2) prod; #optional]
906 S7 sum_1 = prod + sum_0;
908 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
909 same size of 'TYPE1' or bigger. This is a special case of a reduction
910 computation.
912 Input:
914 * STMT_VINFO: The stmt from which the pattern search begins. In the
915 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
916 will be detected.
918 Output:
920 * TYPE_OUT: The type of the output of this pattern.
922 * Return value: A new stmt that will be used to replace the sequence of
923 stmts that constitute the pattern. In this case it will be:
924 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
926 Note: The dot-prod idiom is a widening reduction pattern that is
927 vectorized without preserving all the intermediate results. It
928 produces only N/2 (widened) results (by summing up pairs of
929 intermediate results) rather than all N results. Therefore, we
930 cannot allow this pattern when we want to get all the results and in
931 the correct order (as is the case when this computation is in an
932 inner-loop nested in an outer-loop that us being vectorized). */
934 static gimple *
935 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
937 tree oprnd0, oprnd1;
938 gimple *last_stmt = stmt_vinfo->stmt;
939 vec_info *vinfo = stmt_vinfo->vinfo;
940 tree type, half_type;
941 gimple *pattern_stmt;
942 tree var;
944 /* Look for the following pattern
945 DX = (TYPE1) X;
946 DY = (TYPE1) Y;
947 DPROD = DX * DY;
948 DDPROD = (TYPE2) DPROD;
949 sum_1 = DDPROD + sum_0;
950 In which
951 - DX is double the size of X
952 - DY is double the size of Y
953 - DX, DY, DPROD all have the same type
954 - sum is the same size of DPROD or bigger
955 - sum has been recognized as a reduction variable.
957 This is equivalent to:
958 DPROD = X w* Y; #widen mult
959 sum_1 = DPROD w+ sum_0; #widen summation
961 DPROD = X w* Y; #widen mult
962 sum_1 = DPROD + sum_0; #summation
965 /* Starting from LAST_STMT, follow the defs of its uses in search
966 of the above pattern. */
968 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
969 &oprnd0, &oprnd1))
970 return NULL;
972 type = gimple_expr_type (last_stmt);
974 vect_unpromoted_value unprom_mult;
975 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
977 /* So far so good. Since last_stmt was detected as a (summation) reduction,
978 we know that oprnd1 is the reduction variable (defined by a loop-header
979 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
980 Left to check that oprnd0 is defined by a (widen_)mult_expr */
981 if (!oprnd0)
982 return NULL;
984 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
985 if (!mult_vinfo)
986 return NULL;
988 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
989 inside the loop (in case we are analyzing an outer-loop). */
990 vect_unpromoted_value unprom0[2];
991 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
992 false, 2, unprom0, &half_type))
993 return NULL;
995 /* If there are two widening operations, make sure they agree on
996 the sign of the extension. */
997 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
998 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
999 return NULL;
1001 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1003 tree half_vectype;
1004 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1005 type_out, &half_vectype))
1006 return NULL;
1008 /* Get the inputs in the appropriate types. */
1009 tree mult_oprnd[2];
1010 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
1011 unprom0, half_vectype);
1013 var = vect_recog_temp_ssa_var (type, NULL);
1014 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1015 mult_oprnd[0], mult_oprnd[1], oprnd1);
1017 return pattern_stmt;
1021 /* Function vect_recog_sad_pattern
1023 Try to find the following Sum of Absolute Difference (SAD) pattern:
1025 type x_t, y_t;
1026 signed TYPE1 diff, abs_diff;
1027 TYPE2 sum = init;
1028 loop:
1029 sum_0 = phi <init, sum_1>
1030 S1 x_t = ...
1031 S2 y_t = ...
1032 S3 x_T = (TYPE1) x_t;
1033 S4 y_T = (TYPE1) y_t;
1034 S5 diff = x_T - y_T;
1035 S6 abs_diff = ABS_EXPR <diff>;
1036 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1037 S8 sum_1 = abs_diff + sum_0;
1039 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1040 same size of 'TYPE1' or bigger. This is a special case of a reduction
1041 computation.
1043 Input:
1045 * STMT_VINFO: The stmt from which the pattern search begins. In the
1046 example, when this function is called with S8, the pattern
1047 {S3,S4,S5,S6,S7,S8} will be detected.
1049 Output:
1051 * TYPE_OUT: The type of the output of this pattern.
1053 * Return value: A new stmt that will be used to replace the sequence of
1054 stmts that constitute the pattern. In this case it will be:
1055 SAD_EXPR <x_t, y_t, sum_0>
1058 static gimple *
1059 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1061 gimple *last_stmt = stmt_vinfo->stmt;
1062 vec_info *vinfo = stmt_vinfo->vinfo;
1063 tree half_type;
1065 /* Look for the following pattern
1066 DX = (TYPE1) X;
1067 DY = (TYPE1) Y;
1068 DDIFF = DX - DY;
1069 DAD = ABS_EXPR <DDIFF>;
1070 DDPROD = (TYPE2) DPROD;
1071 sum_1 = DAD + sum_0;
1072 In which
1073 - DX is at least double the size of X
1074 - DY is at least double the size of Y
1075 - DX, DY, DDIFF, DAD all have the same type
1076 - sum is the same size of DAD or bigger
1077 - sum has been recognized as a reduction variable.
1079 This is equivalent to:
1080 DDIFF = X w- Y; #widen sub
1081 DAD = ABS_EXPR <DDIFF>;
1082 sum_1 = DAD w+ sum_0; #widen summation
1084 DDIFF = X w- Y; #widen sub
1085 DAD = ABS_EXPR <DDIFF>;
1086 sum_1 = DAD + sum_0; #summation
1089 /* Starting from LAST_STMT, follow the defs of its uses in search
1090 of the above pattern. */
1092 tree plus_oprnd0, plus_oprnd1;
1093 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1094 &plus_oprnd0, &plus_oprnd1))
1095 return NULL;
1097 tree sum_type = gimple_expr_type (last_stmt);
1099 /* Any non-truncating sequence of conversions is OK here, since
1100 with a successful match, the result of the ABS(U) is known to fit
1101 within the nonnegative range of the result type. (It cannot be the
1102 negative of the minimum signed value due to the range of the widening
1103 MINUS_EXPR.) */
1104 vect_unpromoted_value unprom_abs;
1105 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1106 &unprom_abs);
1108 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1109 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1110 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1111 Then check that plus_oprnd0 is defined by an abs_expr. */
1113 if (!plus_oprnd0)
1114 return NULL;
1116 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1117 if (!abs_stmt_vinfo)
1118 return NULL;
1120 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1121 inside the loop (in case we are analyzing an outer-loop). */
1122 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1123 if (!abs_stmt
1124 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1125 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1126 return NULL;
1128 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1129 tree abs_type = TREE_TYPE (abs_oprnd);
1130 if (TYPE_UNSIGNED (abs_type))
1131 return NULL;
1133 /* Peel off conversions from the ABS input. This can involve sign
1134 changes (e.g. from an unsigned subtraction to a signed ABS input)
1135 or signed promotion, but it can't include unsigned promotion.
1136 (Note that ABS of an unsigned promotion should have been folded
1137 away before now anyway.) */
1138 vect_unpromoted_value unprom_diff;
1139 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1140 &unprom_diff);
1141 if (!abs_oprnd)
1142 return NULL;
1143 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1144 && TYPE_UNSIGNED (unprom_diff.type))
1145 return NULL;
1147 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1148 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1149 if (!diff_stmt_vinfo)
1150 return NULL;
1152 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1153 inside the loop (in case we are analyzing an outer-loop). */
1154 vect_unpromoted_value unprom[2];
1155 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1156 false, 2, unprom, &half_type))
1157 return NULL;
1159 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1161 tree half_vectype;
1162 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1163 type_out, &half_vectype))
1164 return NULL;
1166 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1167 tree sad_oprnd[2];
1168 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1169 unprom, half_vectype);
1171 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1172 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1173 sad_oprnd[1], plus_oprnd1);
1175 return pattern_stmt;
1178 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1179 so that it can be treated as though it had the form:
1181 A_TYPE a;
1182 B_TYPE b;
1183 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1184 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1185 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1186 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1187 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1189 Try to replace the pattern with:
1191 A_TYPE a;
1192 B_TYPE b;
1193 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1194 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1195 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1196 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1198 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1200 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1201 name of the pattern being matched, for dump purposes. */
1203 static gimple *
1204 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1205 tree_code orig_code, tree_code wide_code,
1206 bool shift_p, const char *name)
1208 vec_info *vinfo = last_stmt_info->vinfo;
1209 gimple *last_stmt = last_stmt_info->stmt;
1211 vect_unpromoted_value unprom[2];
1212 tree half_type;
1213 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1214 shift_p, 2, unprom, &half_type))
1215 return NULL;
1217 /* Pattern detected. */
1218 vect_pattern_detected (name, last_stmt);
1220 tree type = gimple_expr_type (last_stmt);
1221 tree itype = type;
1222 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1223 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1224 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1225 TYPE_UNSIGNED (half_type));
1227 /* Check target support */
1228 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1229 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1230 enum tree_code dummy_code;
1231 int dummy_int;
1232 auto_vec<tree> dummy_vec;
1233 if (!vectype
1234 || !vecitype
1235 || !supportable_widening_operation (wide_code, last_stmt_info,
1236 vecitype, vectype,
1237 &dummy_code, &dummy_code,
1238 &dummy_int, &dummy_vec))
1239 return NULL;
1241 *type_out = get_vectype_for_scalar_type (vinfo, type);
1242 if (!*type_out)
1243 return NULL;
1245 tree oprnd[2];
1246 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1248 tree var = vect_recog_temp_ssa_var (itype, NULL);
1249 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1250 oprnd[0], oprnd[1]);
1252 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1255 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1256 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1258 static gimple *
1259 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1261 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1262 WIDEN_MULT_EXPR, false,
1263 "vect_recog_widen_mult_pattern");
1266 /* Function vect_recog_pow_pattern
1268 Try to find the following pattern:
1270 x = POW (y, N);
1272 with POW being one of pow, powf, powi, powif and N being
1273 either 2 or 0.5.
1275 Input:
1277 * STMT_VINFO: The stmt from which the pattern search begins.
1279 Output:
1281 * TYPE_OUT: The type of the output of this pattern.
1283 * Return value: A new stmt that will be used to replace the sequence of
1284 stmts that constitute the pattern. In this case it will be:
1285 x = x * x
1287 x = sqrt (x)
1290 static gimple *
1291 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1293 vec_info *vinfo = stmt_vinfo->vinfo;
1294 gimple *last_stmt = stmt_vinfo->stmt;
1295 tree base, exp;
1296 gimple *stmt;
1297 tree var;
1299 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1300 return NULL;
1302 switch (gimple_call_combined_fn (last_stmt))
1304 CASE_CFN_POW:
1305 CASE_CFN_POWI:
1306 break;
1308 default:
1309 return NULL;
1312 base = gimple_call_arg (last_stmt, 0);
1313 exp = gimple_call_arg (last_stmt, 1);
1314 if (TREE_CODE (exp) != REAL_CST
1315 && TREE_CODE (exp) != INTEGER_CST)
1317 if (flag_unsafe_math_optimizations
1318 && TREE_CODE (base) == REAL_CST
1319 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1321 combined_fn log_cfn;
1322 built_in_function exp_bfn;
1323 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1325 case BUILT_IN_POW:
1326 log_cfn = CFN_BUILT_IN_LOG;
1327 exp_bfn = BUILT_IN_EXP;
1328 break;
1329 case BUILT_IN_POWF:
1330 log_cfn = CFN_BUILT_IN_LOGF;
1331 exp_bfn = BUILT_IN_EXPF;
1332 break;
1333 case BUILT_IN_POWL:
1334 log_cfn = CFN_BUILT_IN_LOGL;
1335 exp_bfn = BUILT_IN_EXPL;
1336 break;
1337 default:
1338 return NULL;
1340 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1341 tree exp_decl = builtin_decl_implicit (exp_bfn);
1342 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1343 does that, but if C is a power of 2, we want to use
1344 exp2 (log2 (C) * x) in the non-vectorized version, but for
1345 vectorization we don't have vectorized exp2. */
1346 if (logc
1347 && TREE_CODE (logc) == REAL_CST
1348 && exp_decl
1349 && lookup_attribute ("omp declare simd",
1350 DECL_ATTRIBUTES (exp_decl)))
1352 cgraph_node *node = cgraph_node::get_create (exp_decl);
1353 if (node->simd_clones == NULL)
1355 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1356 || node->definition)
1357 return NULL;
1358 expand_simd_clones (node);
1359 if (node->simd_clones == NULL)
1360 return NULL;
1362 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1363 if (!*type_out)
1364 return NULL;
1365 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1366 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1367 append_pattern_def_seq (stmt_vinfo, g);
1368 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1369 g = gimple_build_call (exp_decl, 1, def);
1370 gimple_call_set_lhs (g, res);
1371 return g;
1375 return NULL;
1378 /* We now have a pow or powi builtin function call with a constant
1379 exponent. */
1381 /* Catch squaring. */
1382 if ((tree_fits_shwi_p (exp)
1383 && tree_to_shwi (exp) == 2)
1384 || (TREE_CODE (exp) == REAL_CST
1385 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1387 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1388 TREE_TYPE (base), type_out))
1389 return NULL;
1391 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1392 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1393 return stmt;
1396 /* Catch square root. */
1397 if (TREE_CODE (exp) == REAL_CST
1398 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1400 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1401 if (*type_out
1402 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1403 OPTIMIZE_FOR_SPEED))
1405 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1406 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1407 gimple_call_set_lhs (stmt, var);
1408 gimple_call_set_nothrow (stmt, true);
1409 return stmt;
1413 return NULL;
1417 /* Function vect_recog_widen_sum_pattern
1419 Try to find the following pattern:
1421 type x_t;
1422 TYPE x_T, sum = init;
1423 loop:
1424 sum_0 = phi <init, sum_1>
1425 S1 x_t = *p;
1426 S2 x_T = (TYPE) x_t;
1427 S3 sum_1 = x_T + sum_0;
1429 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1430 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1431 a special case of a reduction computation.
1433 Input:
1435 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1436 when this function is called with S3, the pattern {S2,S3} will be detected.
1438 Output:
1440 * TYPE_OUT: The type of the output of this pattern.
1442 * Return value: A new stmt that will be used to replace the sequence of
1443 stmts that constitute the pattern. In this case it will be:
1444 WIDEN_SUM <x_t, sum_0>
1446 Note: The widening-sum idiom is a widening reduction pattern that is
1447 vectorized without preserving all the intermediate results. It
1448 produces only N/2 (widened) results (by summing up pairs of
1449 intermediate results) rather than all N results. Therefore, we
1450 cannot allow this pattern when we want to get all the results and in
1451 the correct order (as is the case when this computation is in an
1452 inner-loop nested in an outer-loop that us being vectorized). */
1454 static gimple *
1455 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1457 gimple *last_stmt = stmt_vinfo->stmt;
1458 tree oprnd0, oprnd1;
1459 vec_info *vinfo = stmt_vinfo->vinfo;
1460 tree type;
1461 gimple *pattern_stmt;
1462 tree var;
1464 /* Look for the following pattern
1465 DX = (TYPE) X;
1466 sum_1 = DX + sum_0;
1467 In which DX is at least double the size of X, and sum_1 has been
1468 recognized as a reduction variable.
1471 /* Starting from LAST_STMT, follow the defs of its uses in search
1472 of the above pattern. */
1474 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1475 &oprnd0, &oprnd1))
1476 return NULL;
1478 type = gimple_expr_type (last_stmt);
1480 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1481 we know that oprnd1 is the reduction variable (defined by a loop-header
1482 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1483 Left to check that oprnd0 is defined by a cast from type 'type' to type
1484 'TYPE'. */
1486 vect_unpromoted_value unprom0;
1487 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1488 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1489 return NULL;
1491 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1493 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1494 unprom0.type, type_out))
1495 return NULL;
1497 var = vect_recog_temp_ssa_var (type, NULL);
1498 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1500 return pattern_stmt;
1503 /* Recognize cases in which an operation is performed in one type WTYPE
1504 but could be done more efficiently in a narrower type NTYPE. For example,
1505 if we have:
1507 ATYPE a; // narrower than NTYPE
1508 BTYPE b; // narrower than NTYPE
1509 WTYPE aw = (WTYPE) a;
1510 WTYPE bw = (WTYPE) b;
1511 WTYPE res = aw + bw; // only uses of aw and bw
1513 then it would be more efficient to do:
1515 NTYPE an = (NTYPE) a;
1516 NTYPE bn = (NTYPE) b;
1517 NTYPE resn = an + bn;
1518 WTYPE res = (WTYPE) resn;
1520 Other situations include things like:
1522 ATYPE a; // NTYPE or narrower
1523 WTYPE aw = (WTYPE) a;
1524 WTYPE res = aw + b;
1526 when only "(NTYPE) res" is significant. In that case it's more efficient
1527 to truncate "b" and do the operation on NTYPE instead:
1529 NTYPE an = (NTYPE) a;
1530 NTYPE bn = (NTYPE) b; // truncation
1531 NTYPE resn = an + bn;
1532 WTYPE res = (WTYPE) resn;
1534 All users of "res" should then use "resn" instead, making the final
1535 statement dead (not marked as relevant). The final statement is still
1536 needed to maintain the type correctness of the IR.
1538 vect_determine_precisions has already determined the minimum
1539 precison of the operation and the minimum precision required
1540 by users of the result. */
1542 static gimple *
1543 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1545 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1546 if (!last_stmt)
1547 return NULL;
1549 /* See whether we have found that this operation can be done on a
1550 narrower type without changing its semantics. */
1551 unsigned int new_precision = last_stmt_info->operation_precision;
1552 if (!new_precision)
1553 return NULL;
1555 vec_info *vinfo = last_stmt_info->vinfo;
1556 tree lhs = gimple_assign_lhs (last_stmt);
1557 tree type = TREE_TYPE (lhs);
1558 tree_code code = gimple_assign_rhs_code (last_stmt);
1560 /* Keep the first operand of a COND_EXPR as-is: only the other two
1561 operands are interesting. */
1562 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1564 /* Check the operands. */
1565 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1566 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1567 unprom.quick_grow (nops);
1568 unsigned int min_precision = 0;
1569 bool single_use_p = false;
1570 for (unsigned int i = 0; i < nops; ++i)
1572 tree op = gimple_op (last_stmt, first_op + i);
1573 if (TREE_CODE (op) == INTEGER_CST)
1574 unprom[i].set_op (op, vect_constant_def);
1575 else if (TREE_CODE (op) == SSA_NAME)
1577 bool op_single_use_p = true;
1578 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1579 &op_single_use_p))
1580 return NULL;
1581 /* If:
1583 (1) N bits of the result are needed;
1584 (2) all inputs are widened from M<N bits; and
1585 (3) one operand OP is a single-use SSA name
1587 we can shift the M->N widening from OP to the output
1588 without changing the number or type of extensions involved.
1589 This then reduces the number of copies of STMT_INFO.
1591 If instead of (3) more than one operand is a single-use SSA name,
1592 shifting the extension to the output is even more of a win.
1594 If instead:
1596 (1) N bits of the result are needed;
1597 (2) one operand OP2 is widened from M2<N bits;
1598 (3) another operand OP1 is widened from M1<M2 bits; and
1599 (4) both OP1 and OP2 are single-use
1601 the choice is between:
1603 (a) truncating OP2 to M1, doing the operation on M1,
1604 and then widening the result to N
1606 (b) widening OP1 to M2, doing the operation on M2, and then
1607 widening the result to N
1609 Both shift the M2->N widening of the inputs to the output.
1610 (a) additionally shifts the M1->M2 widening to the output;
1611 it requires fewer copies of STMT_INFO but requires an extra
1612 M2->M1 truncation.
1614 Which is better will depend on the complexity and cost of
1615 STMT_INFO, which is hard to predict at this stage. However,
1616 a clear tie-breaker in favor of (b) is the fact that the
1617 truncation in (a) increases the length of the operation chain.
1619 If instead of (4) only one of OP1 or OP2 is single-use,
1620 (b) is still a win over doing the operation in N bits:
1621 it still shifts the M2->N widening on the single-use operand
1622 to the output and reduces the number of STMT_INFO copies.
1624 If neither operand is single-use then operating on fewer than
1625 N bits might lead to more extensions overall. Whether it does
1626 or not depends on global information about the vectorization
1627 region, and whether that's a good trade-off would again
1628 depend on the complexity and cost of the statements involved,
1629 as well as things like register pressure that are not normally
1630 modelled at this stage. We therefore ignore these cases
1631 and just optimize the clear single-use wins above.
1633 Thus we take the maximum precision of the unpromoted operands
1634 and record whether any operand is single-use. */
1635 if (unprom[i].dt == vect_internal_def)
1637 min_precision = MAX (min_precision,
1638 TYPE_PRECISION (unprom[i].type));
1639 single_use_p |= op_single_use_p;
1644 /* Although the operation could be done in operation_precision, we have
1645 to balance that against introducing extra truncations or extensions.
1646 Calculate the minimum precision that can be handled efficiently.
1648 The loop above determined that the operation could be handled
1649 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1650 extension from the inputs to the output without introducing more
1651 instructions, and would reduce the number of instructions required
1652 for STMT_INFO itself.
1654 vect_determine_precisions has also determined that the result only
1655 needs min_output_precision bits. Truncating by a factor of N times
1656 requires a tree of N - 1 instructions, so if TYPE is N times wider
1657 than min_output_precision, doing the operation in TYPE and truncating
1658 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1659 In contrast:
1661 - truncating the input to a unary operation and doing the operation
1662 in the new type requires at most N - 1 + 1 = N instructions per
1663 output vector
1665 - doing the same for a binary operation requires at most
1666 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1668 Both unary and binary operations require fewer instructions than
1669 this if the operands were extended from a suitable truncated form.
1670 Thus there is usually nothing to lose by doing operations in
1671 min_output_precision bits, but there can be something to gain. */
1672 if (!single_use_p)
1673 min_precision = last_stmt_info->min_output_precision;
1674 else
1675 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1677 /* Apply the minimum efficient precision we just calculated. */
1678 if (new_precision < min_precision)
1679 new_precision = min_precision;
1680 if (new_precision >= TYPE_PRECISION (type))
1681 return NULL;
1683 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1685 *type_out = get_vectype_for_scalar_type (vinfo, type);
1686 if (!*type_out)
1687 return NULL;
1689 /* We've found a viable pattern. Get the new type of the operation. */
1690 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1691 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1693 /* If we're truncating an operation, we need to make sure that we
1694 don't introduce new undefined overflow. The codes tested here are
1695 a subset of those accepted by vect_truncatable_operation_p. */
1696 tree op_type = new_type;
1697 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1698 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1699 op_type = build_nonstandard_integer_type (new_precision, true);
1701 /* We specifically don't check here whether the target supports the
1702 new operation, since it might be something that a later pattern
1703 wants to rewrite anyway. If targets have a minimum element size
1704 for some optabs, we should pattern-match smaller ops to larger ops
1705 where beneficial. */
1706 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1707 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1708 if (!new_vectype || !op_vectype)
1709 return NULL;
1711 if (dump_enabled_p ())
1712 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1713 type, new_type);
1715 /* Calculate the rhs operands for an operation on OP_TYPE. */
1716 tree ops[3] = {};
1717 for (unsigned int i = 1; i < first_op; ++i)
1718 ops[i - 1] = gimple_op (last_stmt, i);
1719 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1720 op_type, &unprom[0], op_vectype);
1722 /* Use the operation to produce a result of type OP_TYPE. */
1723 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1724 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1725 ops[0], ops[1], ops[2]);
1726 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1728 if (dump_enabled_p ())
1729 dump_printf_loc (MSG_NOTE, vect_location,
1730 "created pattern stmt: %G", pattern_stmt);
1732 /* Convert back to the original signedness, if OP_TYPE is different
1733 from NEW_TYPE. */
1734 if (op_type != new_type)
1735 pattern_stmt = vect_convert_output (last_stmt_info, new_type,
1736 pattern_stmt, op_vectype);
1738 /* Promote the result to the original type. */
1739 pattern_stmt = vect_convert_output (last_stmt_info, type,
1740 pattern_stmt, new_vectype);
1742 return pattern_stmt;
1745 /* Recognize the following patterns:
1747 ATYPE a; // narrower than TYPE
1748 BTYPE b; // narrower than TYPE
1750 1) Multiply high with scaling
1751 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1752 2) ... or also with rounding
1753 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1755 where only the bottom half of res is used. */
1757 static gimple *
1758 vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1760 /* Check for a right shift. */
1761 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1762 if (!last_stmt
1763 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1764 return NULL;
1765 vec_info *vinfo = last_stmt_info->vinfo;
1767 /* Check that the shift result is wider than the users of the
1768 result need (i.e. that narrowing would be a natural choice). */
1769 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1770 unsigned int target_precision
1771 = vect_element_precision (last_stmt_info->min_output_precision);
1772 if (!INTEGRAL_TYPE_P (lhs_type)
1773 || target_precision >= TYPE_PRECISION (lhs_type))
1774 return NULL;
1776 /* Look through any change in sign on the outer shift input. */
1777 vect_unpromoted_value unprom_rshift_input;
1778 tree rshift_input = vect_look_through_possible_promotion
1779 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1780 if (!rshift_input
1781 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1782 != TYPE_PRECISION (lhs_type))
1783 return NULL;
1785 /* Get the definition of the shift input. */
1786 stmt_vec_info rshift_input_stmt_info
1787 = vect_get_internal_def (vinfo, rshift_input);
1788 if (!rshift_input_stmt_info)
1789 return NULL;
1790 gassign *rshift_input_stmt
1791 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1792 if (!rshift_input_stmt)
1793 return NULL;
1795 stmt_vec_info mulh_stmt_info;
1796 tree scale_term;
1797 internal_fn ifn;
1798 unsigned int expect_offset;
1800 /* Check for the presence of the rounding term. */
1801 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1803 /* Check that the outer shift was by 1. */
1804 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1805 return NULL;
1807 /* Check that the second operand of the PLUS_EXPR is 1. */
1808 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1809 return NULL;
1811 /* Look through any change in sign on the addition input. */
1812 vect_unpromoted_value unprom_plus_input;
1813 tree plus_input = vect_look_through_possible_promotion
1814 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1815 if (!plus_input
1816 || TYPE_PRECISION (TREE_TYPE (plus_input))
1817 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
1818 return NULL;
1820 /* Get the definition of the multiply-high-scale part. */
1821 stmt_vec_info plus_input_stmt_info
1822 = vect_get_internal_def (vinfo, plus_input);
1823 if (!plus_input_stmt_info)
1824 return NULL;
1825 gassign *plus_input_stmt
1826 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1827 if (!plus_input_stmt
1828 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1829 return NULL;
1831 /* Look through any change in sign on the scaling input. */
1832 vect_unpromoted_value unprom_scale_input;
1833 tree scale_input = vect_look_through_possible_promotion
1834 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1835 if (!scale_input
1836 || TYPE_PRECISION (TREE_TYPE (scale_input))
1837 != TYPE_PRECISION (TREE_TYPE (plus_input)))
1838 return NULL;
1840 /* Get the definition of the multiply-high part. */
1841 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1842 if (!mulh_stmt_info)
1843 return NULL;
1845 /* Get the scaling term. */
1846 scale_term = gimple_assign_rhs2 (plus_input_stmt);
1848 expect_offset = target_precision + 2;
1849 ifn = IFN_MULHRS;
1851 else
1853 mulh_stmt_info = rshift_input_stmt_info;
1854 scale_term = gimple_assign_rhs2 (last_stmt);
1856 expect_offset = target_precision + 1;
1857 ifn = IFN_MULHS;
1860 /* Check that the scaling factor is correct. */
1861 if (TREE_CODE (scale_term) != INTEGER_CST
1862 || wi::to_widest (scale_term) + expect_offset
1863 != TYPE_PRECISION (lhs_type))
1864 return NULL;
1866 /* Check whether the scaling input term can be seen as two widened
1867 inputs multiplied together. */
1868 vect_unpromoted_value unprom_mult[2];
1869 tree new_type;
1870 unsigned int nops
1871 = vect_widened_op_tree (mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1872 false, 2, unprom_mult, &new_type);
1873 if (nops != 2)
1874 return NULL;
1876 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1878 /* Adjust output precision. */
1879 if (TYPE_PRECISION (new_type) < target_precision)
1880 new_type = build_nonstandard_integer_type
1881 (target_precision, TYPE_UNSIGNED (new_type));
1883 /* Check for target support. */
1884 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1885 if (!new_vectype
1886 || !direct_internal_fn_supported_p
1887 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1888 return NULL;
1890 /* The IR requires a valid vector type for the cast result, even though
1891 it's likely to be discarded. */
1892 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
1893 if (!*type_out)
1894 return NULL;
1896 /* Generate the IFN_MULHRS call. */
1897 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1898 tree new_ops[2];
1899 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1900 unprom_mult, new_vectype);
1901 gcall *mulhrs_stmt
1902 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1903 gimple_call_set_lhs (mulhrs_stmt, new_var);
1904 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_NOTE, vect_location,
1908 "created pattern stmt: %G", mulhrs_stmt);
1910 return vect_convert_output (last_stmt_info, lhs_type,
1911 mulhrs_stmt, new_vectype);
1914 /* Recognize the patterns:
1916 ATYPE a; // narrower than TYPE
1917 BTYPE b; // narrower than TYPE
1918 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1919 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1921 where only the bottom half of avg is used. Try to transform them into:
1923 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1924 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1926 followed by:
1928 TYPE avg = (TYPE) avg';
1930 where NTYPE is no wider than half of TYPE. Since only the bottom half
1931 of avg is used, all or part of the cast of avg' should become redundant.
1933 If there is no target support available, generate code to distribute rshift
1934 over plus and add a carry. */
1936 static gimple *
1937 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1939 /* Check for a shift right by one bit. */
1940 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1941 vec_info *vinfo = last_stmt_info->vinfo;
1942 if (!last_stmt
1943 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1944 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1945 return NULL;
1947 /* Check that the shift result is wider than the users of the
1948 result need (i.e. that narrowing would be a natural choice). */
1949 tree lhs = gimple_assign_lhs (last_stmt);
1950 tree type = TREE_TYPE (lhs);
1951 unsigned int target_precision
1952 = vect_element_precision (last_stmt_info->min_output_precision);
1953 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1954 return NULL;
1956 /* Look through any change in sign on the shift input. */
1957 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1958 vect_unpromoted_value unprom_plus;
1959 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1960 &unprom_plus);
1961 if (!rshift_rhs
1962 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1963 return NULL;
1965 /* Get the definition of the shift input. */
1966 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1967 if (!plus_stmt_info)
1968 return NULL;
1970 /* Check whether the shift input can be seen as a tree of additions on
1971 2 or 3 widened inputs.
1973 Note that the pattern should be a win even if the result of one or
1974 more additions is reused elsewhere: if the pattern matches, we'd be
1975 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1976 internal_fn ifn = IFN_AVG_FLOOR;
1977 vect_unpromoted_value unprom[3];
1978 tree new_type;
1979 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1980 PLUS_EXPR, false, 3,
1981 unprom, &new_type);
1982 if (nops == 0)
1983 return NULL;
1984 if (nops == 3)
1986 /* Check that one operand is 1. */
1987 unsigned int i;
1988 for (i = 0; i < 3; ++i)
1989 if (integer_onep (unprom[i].op))
1990 break;
1991 if (i == 3)
1992 return NULL;
1993 /* Throw away the 1 operand and keep the other two. */
1994 if (i < 2)
1995 unprom[i] = unprom[2];
1996 ifn = IFN_AVG_CEIL;
1999 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2001 /* We know that:
2003 (a) the operation can be viewed as:
2005 TYPE widened0 = (TYPE) UNPROM[0];
2006 TYPE widened1 = (TYPE) UNPROM[1];
2007 TYPE tmp1 = widened0 + widened1 {+ 1};
2008 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2010 (b) the first two statements are equivalent to:
2012 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2013 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2015 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2016 where sensible;
2018 (d) all the operations can be performed correctly at twice the width of
2019 NEW_TYPE, due to the nature of the average operation; and
2021 (e) users of the result of the right shift need only TARGET_PRECISION
2022 bits, where TARGET_PRECISION is no more than half of TYPE's
2023 precision.
2025 Under these circumstances, the only situation in which NEW_TYPE
2026 could be narrower than TARGET_PRECISION is if widened0, widened1
2027 and an addition result are all used more than once. Thus we can
2028 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2029 as "free", whereas widening the result of the average instruction
2030 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2031 therefore better not to go narrower than TARGET_PRECISION. */
2032 if (TYPE_PRECISION (new_type) < target_precision)
2033 new_type = build_nonstandard_integer_type (target_precision,
2034 TYPE_UNSIGNED (new_type));
2036 /* Check for target support. */
2037 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2038 if (!new_vectype)
2039 return NULL;
2041 bool fallback_p = false;
2043 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2045 else if (TYPE_UNSIGNED (new_type)
2046 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2047 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2048 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2049 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2050 fallback_p = true;
2051 else
2052 return NULL;
2054 /* The IR requires a valid vector type for the cast result, even though
2055 it's likely to be discarded. */
2056 *type_out = get_vectype_for_scalar_type (vinfo, type);
2057 if (!*type_out)
2058 return NULL;
2060 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2061 tree new_ops[2];
2062 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
2063 unprom, new_vectype);
2065 if (fallback_p)
2067 /* As a fallback, generate code for following sequence:
2069 shifted_op0 = new_ops[0] >> 1;
2070 shifted_op1 = new_ops[1] >> 1;
2071 sum_of_shifted = shifted_op0 + shifted_op1;
2072 unmasked_carry = new_ops[0] and/or new_ops[1];
2073 carry = unmasked_carry & 1;
2074 new_var = sum_of_shifted + carry;
2077 tree one_cst = build_one_cst (new_type);
2078 gassign *g;
2080 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2081 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2082 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2084 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2085 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2086 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2088 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2089 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2090 shifted_op0, shifted_op1);
2091 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2093 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2094 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2095 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2096 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2098 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2099 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2100 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2102 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2103 return vect_convert_output (last_stmt_info, type, g, new_vectype);
2106 /* Generate the IFN_AVG* call. */
2107 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2108 new_ops[1]);
2109 gimple_call_set_lhs (average_stmt, new_var);
2110 gimple_set_location (average_stmt, gimple_location (last_stmt));
2112 if (dump_enabled_p ())
2113 dump_printf_loc (MSG_NOTE, vect_location,
2114 "created pattern stmt: %G", average_stmt);
2116 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
2119 /* Recognize cases in which the input to a cast is wider than its
2120 output, and the input is fed by a widening operation. Fold this
2121 by removing the unnecessary intermediate widening. E.g.:
2123 unsigned char a;
2124 unsigned int b = (unsigned int) a;
2125 unsigned short c = (unsigned short) b;
2129 unsigned short c = (unsigned short) a;
2131 Although this is rare in input IR, it is an expected side-effect
2132 of the over-widening pattern above.
2134 This is beneficial also for integer-to-float conversions, if the
2135 widened integer has more bits than the float, and if the unwidened
2136 input doesn't. */
2138 static gimple *
2139 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
2141 /* Check for a cast, including an integer-to-float conversion. */
2142 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2143 if (!last_stmt)
2144 return NULL;
2145 tree_code code = gimple_assign_rhs_code (last_stmt);
2146 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2147 return NULL;
2149 /* Make sure that the rhs is a scalar with a natural bitsize. */
2150 tree lhs = gimple_assign_lhs (last_stmt);
2151 if (!lhs)
2152 return NULL;
2153 tree lhs_type = TREE_TYPE (lhs);
2154 scalar_mode lhs_mode;
2155 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2156 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2157 return NULL;
2159 /* Check for a narrowing operation (from a vector point of view). */
2160 tree rhs = gimple_assign_rhs1 (last_stmt);
2161 tree rhs_type = TREE_TYPE (rhs);
2162 if (!INTEGRAL_TYPE_P (rhs_type)
2163 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2164 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2165 return NULL;
2167 /* Try to find an unpromoted input. */
2168 vec_info *vinfo = last_stmt_info->vinfo;
2169 vect_unpromoted_value unprom;
2170 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2171 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2172 return NULL;
2174 /* If the bits above RHS_TYPE matter, make sure that they're the
2175 same when extending from UNPROM as they are when extending from RHS. */
2176 if (!INTEGRAL_TYPE_P (lhs_type)
2177 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2178 return NULL;
2180 /* We can get the same result by casting UNPROM directly, to avoid
2181 the unnecessary widening and narrowing. */
2182 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2184 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2185 if (!*type_out)
2186 return NULL;
2188 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2189 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2190 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2192 return pattern_stmt;
2195 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2196 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2198 static gimple *
2199 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
2201 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
2202 WIDEN_LSHIFT_EXPR, true,
2203 "vect_recog_widen_shift_pattern");
2206 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2208 type a_t, b_t, c_t;
2210 S0 a_t = b_t r<< c_t;
2212 Input/Output:
2214 * STMT_VINFO: The stmt from which the pattern search begins,
2215 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2216 with a sequence:
2218 S1 d_t = -c_t;
2219 S2 e_t = d_t & (B - 1);
2220 S3 f_t = b_t << c_t;
2221 S4 g_t = b_t >> e_t;
2222 S0 a_t = f_t | g_t;
2224 where B is element bitsize of type.
2226 Output:
2228 * TYPE_OUT: The type of the output of this pattern.
2230 * Return value: A new stmt that will be used to replace the rotate
2231 S0 stmt. */
2233 static gimple *
2234 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2236 gimple *last_stmt = stmt_vinfo->stmt;
2237 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2238 gimple *pattern_stmt, *def_stmt;
2239 enum tree_code rhs_code;
2240 vec_info *vinfo = stmt_vinfo->vinfo;
2241 enum vect_def_type dt;
2242 optab optab1, optab2;
2243 edge ext_def = NULL;
2244 bool bswap16_p = false;
2246 if (is_gimple_assign (last_stmt))
2248 rhs_code = gimple_assign_rhs_code (last_stmt);
2249 switch (rhs_code)
2251 case LROTATE_EXPR:
2252 case RROTATE_EXPR:
2253 break;
2254 default:
2255 return NULL;
2258 lhs = gimple_assign_lhs (last_stmt);
2259 oprnd0 = gimple_assign_rhs1 (last_stmt);
2260 type = TREE_TYPE (oprnd0);
2261 oprnd1 = gimple_assign_rhs2 (last_stmt);
2263 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2265 /* __builtin_bswap16 (x) is another form of x r>> 8.
2266 The vectorizer has bswap support, but only if the argument isn't
2267 promoted. */
2268 lhs = gimple_call_lhs (last_stmt);
2269 oprnd0 = gimple_call_arg (last_stmt, 0);
2270 type = TREE_TYPE (oprnd0);
2271 if (!lhs
2272 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2273 || TYPE_PRECISION (type) <= 16
2274 || TREE_CODE (oprnd0) != SSA_NAME
2275 || BITS_PER_UNIT != 8
2276 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2277 return NULL;
2279 stmt_vec_info def_stmt_info;
2280 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2281 return NULL;
2283 if (dt != vect_internal_def)
2284 return NULL;
2286 if (gimple_assign_cast_p (def_stmt))
2288 def = gimple_assign_rhs1 (def_stmt);
2289 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2290 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2291 oprnd0 = def;
2294 type = TREE_TYPE (lhs);
2295 vectype = get_vectype_for_scalar_type (vinfo, type);
2296 if (vectype == NULL_TREE)
2297 return NULL;
2299 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2301 /* The encoding uses one stepped pattern for each byte in the
2302 16-bit word. */
2303 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2304 for (unsigned i = 0; i < 3; ++i)
2305 for (unsigned j = 0; j < 2; ++j)
2306 elts.quick_push ((i + 1) * 2 - j - 1);
2308 vec_perm_indices indices (elts, 1,
2309 TYPE_VECTOR_SUBPARTS (char_vectype));
2310 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2312 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2313 undo the argument promotion. */
2314 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2316 def = vect_recog_temp_ssa_var (type, NULL);
2317 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2318 append_pattern_def_seq (stmt_vinfo, def_stmt);
2319 oprnd0 = def;
2322 /* Pattern detected. */
2323 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2325 *type_out = vectype;
2327 /* Pattern supported. Create a stmt to be used to replace the
2328 pattern, with the unpromoted argument. */
2329 var = vect_recog_temp_ssa_var (type, NULL);
2330 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2331 1, oprnd0);
2332 gimple_call_set_lhs (pattern_stmt, var);
2333 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2334 gimple_call_fntype (last_stmt));
2335 return pattern_stmt;
2339 oprnd1 = build_int_cst (integer_type_node, 8);
2340 rhs_code = LROTATE_EXPR;
2341 bswap16_p = true;
2343 else
2344 return NULL;
2346 if (TREE_CODE (oprnd0) != SSA_NAME
2347 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2348 || !INTEGRAL_TYPE_P (type)
2349 || !TYPE_UNSIGNED (type))
2350 return NULL;
2352 stmt_vec_info def_stmt_info;
2353 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2354 return NULL;
2356 if (dt != vect_internal_def
2357 && dt != vect_constant_def
2358 && dt != vect_external_def)
2359 return NULL;
2361 vectype = get_vectype_for_scalar_type (vinfo, type);
2362 if (vectype == NULL_TREE)
2363 return NULL;
2365 /* If vector/vector or vector/scalar rotate is supported by the target,
2366 don't do anything here. */
2367 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2368 if (optab1
2369 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2371 use_rotate:
2372 if (bswap16_p)
2374 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2376 def = vect_recog_temp_ssa_var (type, NULL);
2377 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2378 append_pattern_def_seq (stmt_vinfo, def_stmt);
2379 oprnd0 = def;
2382 /* Pattern detected. */
2383 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2385 *type_out = vectype;
2387 /* Pattern supported. Create a stmt to be used to replace the
2388 pattern. */
2389 var = vect_recog_temp_ssa_var (type, NULL);
2390 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2391 oprnd1);
2392 return pattern_stmt;
2394 return NULL;
2397 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2399 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2400 if (optab2
2401 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2402 goto use_rotate;
2405 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2406 don't do anything here either. */
2407 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2408 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2409 if (!optab1
2410 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2411 || !optab2
2412 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2414 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2415 return NULL;
2416 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2417 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2418 if (!optab1
2419 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2420 || !optab2
2421 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2422 return NULL;
2425 *type_out = vectype;
2427 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2429 def = vect_recog_temp_ssa_var (type, NULL);
2430 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2431 append_pattern_def_seq (stmt_vinfo, def_stmt);
2432 oprnd0 = def;
2435 if (dt == vect_external_def
2436 && TREE_CODE (oprnd1) == SSA_NAME)
2437 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2439 def = NULL_TREE;
2440 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2441 if (TREE_CODE (oprnd1) == INTEGER_CST
2442 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2443 def = oprnd1;
2444 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2446 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2447 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2448 && TYPE_PRECISION (TREE_TYPE (rhs1))
2449 == TYPE_PRECISION (type))
2450 def = rhs1;
2453 if (def == NULL_TREE)
2455 def = vect_recog_temp_ssa_var (type, NULL);
2456 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2457 if (ext_def)
2459 basic_block new_bb
2460 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2461 gcc_assert (!new_bb);
2463 else
2464 append_pattern_def_seq (stmt_vinfo, def_stmt);
2466 stype = TREE_TYPE (def);
2467 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2469 if (TREE_CODE (def) == INTEGER_CST)
2471 if (!tree_fits_uhwi_p (def)
2472 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2473 || integer_zerop (def))
2474 return NULL;
2475 def2 = build_int_cst (stype,
2476 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2478 else
2480 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2482 if (vecstype == NULL_TREE)
2483 return NULL;
2484 def2 = vect_recog_temp_ssa_var (stype, NULL);
2485 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2486 if (ext_def)
2488 basic_block new_bb
2489 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2490 gcc_assert (!new_bb);
2492 else
2493 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2495 def2 = vect_recog_temp_ssa_var (stype, NULL);
2496 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2497 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2498 gimple_assign_lhs (def_stmt), mask);
2499 if (ext_def)
2501 basic_block new_bb
2502 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2503 gcc_assert (!new_bb);
2505 else
2506 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2509 var1 = vect_recog_temp_ssa_var (type, NULL);
2510 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2511 ? LSHIFT_EXPR : RSHIFT_EXPR,
2512 oprnd0, def);
2513 append_pattern_def_seq (stmt_vinfo, def_stmt);
2515 var2 = vect_recog_temp_ssa_var (type, NULL);
2516 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2517 ? RSHIFT_EXPR : LSHIFT_EXPR,
2518 oprnd0, def2);
2519 append_pattern_def_seq (stmt_vinfo, def_stmt);
2521 /* Pattern detected. */
2522 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2524 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2525 var = vect_recog_temp_ssa_var (type, NULL);
2526 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2528 return pattern_stmt;
2531 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2532 vectorized:
2534 type a_t;
2535 TYPE b_T, res_T;
2537 S1 a_t = ;
2538 S2 b_T = ;
2539 S3 res_T = b_T op a_t;
2541 where type 'TYPE' is a type with different size than 'type',
2542 and op is <<, >> or rotate.
2544 Also detect cases:
2546 type a_t;
2547 TYPE b_T, c_T, res_T;
2549 S0 c_T = ;
2550 S1 a_t = (type) c_T;
2551 S2 b_T = ;
2552 S3 res_T = b_T op a_t;
2554 Input/Output:
2556 * STMT_VINFO: The stmt from which the pattern search begins,
2557 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2558 with a shift/rotate which has same type on both operands, in the
2559 second case just b_T op c_T, in the first case with added cast
2560 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2562 Output:
2564 * TYPE_OUT: The type of the output of this pattern.
2566 * Return value: A new stmt that will be used to replace the shift/rotate
2567 S3 stmt. */
2569 static gimple *
2570 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2571 tree *type_out)
2573 gimple *last_stmt = stmt_vinfo->stmt;
2574 tree oprnd0, oprnd1, lhs, var;
2575 gimple *pattern_stmt;
2576 enum tree_code rhs_code;
2577 vec_info *vinfo = stmt_vinfo->vinfo;
2579 if (!is_gimple_assign (last_stmt))
2580 return NULL;
2582 rhs_code = gimple_assign_rhs_code (last_stmt);
2583 switch (rhs_code)
2585 case LSHIFT_EXPR:
2586 case RSHIFT_EXPR:
2587 case LROTATE_EXPR:
2588 case RROTATE_EXPR:
2589 break;
2590 default:
2591 return NULL;
2594 lhs = gimple_assign_lhs (last_stmt);
2595 oprnd0 = gimple_assign_rhs1 (last_stmt);
2596 oprnd1 = gimple_assign_rhs2 (last_stmt);
2597 if (TREE_CODE (oprnd0) != SSA_NAME
2598 || TREE_CODE (oprnd1) != SSA_NAME
2599 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2600 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2601 || TYPE_PRECISION (TREE_TYPE (lhs))
2602 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2603 return NULL;
2605 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2606 if (!def_vinfo)
2607 return NULL;
2609 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2610 if (*type_out == NULL_TREE)
2611 return NULL;
2613 tree def = NULL_TREE;
2614 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2615 if (def_stmt && gimple_assign_cast_p (def_stmt))
2617 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2618 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2619 && TYPE_PRECISION (TREE_TYPE (rhs1))
2620 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2622 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2623 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2624 def = rhs1;
2625 else
2627 tree mask
2628 = build_low_bits_mask (TREE_TYPE (rhs1),
2629 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2630 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2631 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2632 tree vecstype = get_vectype_for_scalar_type (vinfo,
2633 TREE_TYPE (rhs1));
2634 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2639 if (def == NULL_TREE)
2641 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2642 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2643 append_pattern_def_seq (stmt_vinfo, def_stmt);
2646 /* Pattern detected. */
2647 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2649 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2650 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2651 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2653 return pattern_stmt;
2656 /* Return true iff the target has a vector optab implementing the operation
2657 CODE on type VECTYPE. */
2659 static bool
2660 target_has_vecop_for_code (tree_code code, tree vectype)
2662 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2663 return voptab
2664 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2667 /* Verify that the target has optabs of VECTYPE to perform all the steps
2668 needed by the multiplication-by-immediate synthesis algorithm described by
2669 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2670 present. Return true iff the target supports all the steps. */
2672 static bool
2673 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2674 tree vectype, bool synth_shift_p)
2676 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2677 return false;
2679 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2680 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2682 if (var == negate_variant
2683 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2684 return false;
2686 /* If we must synthesize shifts with additions make sure that vector
2687 addition is available. */
2688 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2689 return false;
2691 for (int i = 1; i < alg->ops; i++)
2693 switch (alg->op[i])
2695 case alg_shift:
2696 break;
2697 case alg_add_t_m2:
2698 case alg_add_t2_m:
2699 case alg_add_factor:
2700 if (!supports_vplus)
2701 return false;
2702 break;
2703 case alg_sub_t_m2:
2704 case alg_sub_t2_m:
2705 case alg_sub_factor:
2706 if (!supports_vminus)
2707 return false;
2708 break;
2709 case alg_unknown:
2710 case alg_m:
2711 case alg_zero:
2712 case alg_impossible:
2713 return false;
2714 default:
2715 gcc_unreachable ();
2719 return true;
2722 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2723 putting the final result in DEST. Append all statements but the last into
2724 VINFO. Return the last statement. */
2726 static gimple *
2727 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2728 stmt_vec_info vinfo)
2730 HOST_WIDE_INT i;
2731 tree itype = TREE_TYPE (op);
2732 tree prev_res = op;
2733 gcc_assert (amnt >= 0);
2734 for (i = 0; i < amnt; i++)
2736 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2737 : dest;
2738 gimple *stmt
2739 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2740 prev_res = tmp_var;
2741 if (i < amnt - 1)
2742 append_pattern_def_seq (vinfo, stmt);
2743 else
2744 return stmt;
2746 gcc_unreachable ();
2747 return NULL;
2750 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2751 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2752 the process if necessary. Append the resulting assignment statements
2753 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2754 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2755 left shifts using additions. */
2757 static tree
2758 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2759 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2761 if (integer_zerop (op2)
2762 && (code == LSHIFT_EXPR
2763 || code == PLUS_EXPR))
2765 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2766 return op1;
2769 gimple *stmt;
2770 tree itype = TREE_TYPE (op1);
2771 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2773 if (code == LSHIFT_EXPR
2774 && synth_shift_p)
2776 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2777 stmt_vinfo);
2778 append_pattern_def_seq (stmt_vinfo, stmt);
2779 return tmp_var;
2782 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2783 append_pattern_def_seq (stmt_vinfo, stmt);
2784 return tmp_var;
2787 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2788 and simple arithmetic operations to be vectorized. Record the statements
2789 produced in STMT_VINFO and return the last statement in the sequence or
2790 NULL if it's not possible to synthesize such a multiplication.
2791 This function mirrors the behavior of expand_mult_const in expmed.c but
2792 works on tree-ssa form. */
2794 static gimple *
2795 vect_synth_mult_by_constant (tree op, tree val,
2796 stmt_vec_info stmt_vinfo)
2798 vec_info *vinfo = stmt_vinfo->vinfo;
2799 tree itype = TREE_TYPE (op);
2800 machine_mode mode = TYPE_MODE (itype);
2801 struct algorithm alg;
2802 mult_variant variant;
2803 if (!tree_fits_shwi_p (val))
2804 return NULL;
2806 /* Multiplication synthesis by shifts, adds and subs can introduce
2807 signed overflow where the original operation didn't. Perform the
2808 operations on an unsigned type and cast back to avoid this.
2809 In the future we may want to relax this for synthesis algorithms
2810 that we can prove do not cause unexpected overflow. */
2811 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2813 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2815 /* Targets that don't support vector shifts but support vector additions
2816 can synthesize shifts that way. */
2817 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
2819 HOST_WIDE_INT hwval = tree_to_shwi (val);
2820 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2821 The vectorizer's benefit analysis will decide whether it's beneficial
2822 to do this. */
2823 bool possible = choose_mult_variant (mode, hwval, &alg,
2824 &variant, MAX_COST);
2825 if (!possible)
2826 return NULL;
2828 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
2830 if (!vectype
2831 || !target_supports_mult_synth_alg (&alg, variant,
2832 vectype, synth_shift_p))
2833 return NULL;
2835 tree accumulator;
2837 /* Clear out the sequence of statements so we can populate it below. */
2838 gimple *stmt = NULL;
2840 if (cast_to_unsigned_p)
2842 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2843 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2844 append_pattern_def_seq (stmt_vinfo, stmt);
2845 op = tmp_op;
2848 if (alg.op[0] == alg_zero)
2849 accumulator = build_int_cst (multtype, 0);
2850 else
2851 accumulator = op;
2853 bool needs_fixup = (variant == negate_variant)
2854 || (variant == add_variant);
2856 for (int i = 1; i < alg.ops; i++)
2858 tree shft_log = build_int_cst (multtype, alg.log[i]);
2859 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2860 tree tmp_var = NULL_TREE;
2862 switch (alg.op[i])
2864 case alg_shift:
2865 if (synth_shift_p)
2866 stmt
2867 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2868 stmt_vinfo);
2869 else
2870 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2871 shft_log);
2872 break;
2873 case alg_add_t_m2:
2874 tmp_var
2875 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2876 stmt_vinfo, synth_shift_p);
2877 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2878 tmp_var);
2879 break;
2880 case alg_sub_t_m2:
2881 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2882 shft_log, stmt_vinfo,
2883 synth_shift_p);
2884 /* In some algorithms the first step involves zeroing the
2885 accumulator. If subtracting from such an accumulator
2886 just emit the negation directly. */
2887 if (integer_zerop (accumulator))
2888 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2889 else
2890 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2891 tmp_var);
2892 break;
2893 case alg_add_t2_m:
2894 tmp_var
2895 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2896 stmt_vinfo, synth_shift_p);
2897 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2898 break;
2899 case alg_sub_t2_m:
2900 tmp_var
2901 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2902 stmt_vinfo, synth_shift_p);
2903 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2904 break;
2905 case alg_add_factor:
2906 tmp_var
2907 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2908 stmt_vinfo, synth_shift_p);
2909 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2910 tmp_var);
2911 break;
2912 case alg_sub_factor:
2913 tmp_var
2914 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2915 stmt_vinfo, synth_shift_p);
2916 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2917 accumulator);
2918 break;
2919 default:
2920 gcc_unreachable ();
2922 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2923 but rather return it directly. */
2925 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2926 append_pattern_def_seq (stmt_vinfo, stmt);
2927 accumulator = accum_tmp;
2929 if (variant == negate_variant)
2931 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2932 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2933 accumulator = accum_tmp;
2934 if (cast_to_unsigned_p)
2935 append_pattern_def_seq (stmt_vinfo, stmt);
2937 else if (variant == add_variant)
2939 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2940 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2941 accumulator = accum_tmp;
2942 if (cast_to_unsigned_p)
2943 append_pattern_def_seq (stmt_vinfo, stmt);
2945 /* Move back to a signed if needed. */
2946 if (cast_to_unsigned_p)
2948 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2949 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2952 return stmt;
2955 /* Detect multiplication by constant and convert it into a sequence of
2956 shifts and additions, subtractions, negations. We reuse the
2957 choose_mult_variant algorithms from expmed.c
2959 Input/Output:
2961 STMT_VINFO: The stmt from which the pattern search begins,
2962 i.e. the mult stmt.
2964 Output:
2966 * TYPE_OUT: The type of the output of this pattern.
2968 * Return value: A new stmt that will be used to replace
2969 the multiplication. */
2971 static gimple *
2972 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2974 vec_info *vinfo = stmt_vinfo->vinfo;
2975 gimple *last_stmt = stmt_vinfo->stmt;
2976 tree oprnd0, oprnd1, vectype, itype;
2977 gimple *pattern_stmt;
2979 if (!is_gimple_assign (last_stmt))
2980 return NULL;
2982 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2983 return NULL;
2985 oprnd0 = gimple_assign_rhs1 (last_stmt);
2986 oprnd1 = gimple_assign_rhs2 (last_stmt);
2987 itype = TREE_TYPE (oprnd0);
2989 if (TREE_CODE (oprnd0) != SSA_NAME
2990 || TREE_CODE (oprnd1) != INTEGER_CST
2991 || !INTEGRAL_TYPE_P (itype)
2992 || !type_has_mode_precision_p (itype))
2993 return NULL;
2995 vectype = get_vectype_for_scalar_type (vinfo, itype);
2996 if (vectype == NULL_TREE)
2997 return NULL;
2999 /* If the target can handle vectorized multiplication natively,
3000 don't attempt to optimize this. */
3001 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3002 if (mul_optab != unknown_optab)
3004 machine_mode vec_mode = TYPE_MODE (vectype);
3005 int icode = (int) optab_handler (mul_optab, vec_mode);
3006 if (icode != CODE_FOR_nothing)
3007 return NULL;
3010 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
3011 if (!pattern_stmt)
3012 return NULL;
3014 /* Pattern detected. */
3015 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3017 *type_out = vectype;
3019 return pattern_stmt;
3022 /* Detect a signed division by a constant that wouldn't be
3023 otherwise vectorized:
3025 type a_t, b_t;
3027 S1 a_t = b_t / N;
3029 where type 'type' is an integral type and N is a constant.
3031 Similarly handle modulo by a constant:
3033 S4 a_t = b_t % N;
3035 Input/Output:
3037 * STMT_VINFO: The stmt from which the pattern search begins,
3038 i.e. the division stmt. S1 is replaced by if N is a power
3039 of two constant and type is signed:
3040 S3 y_t = b_t < 0 ? N - 1 : 0;
3041 S2 x_t = b_t + y_t;
3042 S1' a_t = x_t >> log2 (N);
3044 S4 is replaced if N is a power of two constant and
3045 type is signed by (where *_T temporaries have unsigned type):
3046 S9 y_T = b_t < 0 ? -1U : 0U;
3047 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3048 S7 z_t = (type) z_T;
3049 S6 w_t = b_t + z_t;
3050 S5 x_t = w_t & (N - 1);
3051 S4' a_t = x_t - z_t;
3053 Output:
3055 * TYPE_OUT: The type of the output of this pattern.
3057 * Return value: A new stmt that will be used to replace the division
3058 S1 or modulo S4 stmt. */
3060 static gimple *
3061 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3063 vec_info *vinfo = stmt_vinfo->vinfo;
3064 gimple *last_stmt = stmt_vinfo->stmt;
3065 tree oprnd0, oprnd1, vectype, itype, cond;
3066 gimple *pattern_stmt, *def_stmt;
3067 enum tree_code rhs_code;
3068 optab optab;
3069 tree q;
3070 int dummy_int, prec;
3072 if (!is_gimple_assign (last_stmt))
3073 return NULL;
3075 rhs_code = gimple_assign_rhs_code (last_stmt);
3076 switch (rhs_code)
3078 case TRUNC_DIV_EXPR:
3079 case EXACT_DIV_EXPR:
3080 case TRUNC_MOD_EXPR:
3081 break;
3082 default:
3083 return NULL;
3086 oprnd0 = gimple_assign_rhs1 (last_stmt);
3087 oprnd1 = gimple_assign_rhs2 (last_stmt);
3088 itype = TREE_TYPE (oprnd0);
3089 if (TREE_CODE (oprnd0) != SSA_NAME
3090 || TREE_CODE (oprnd1) != INTEGER_CST
3091 || TREE_CODE (itype) != INTEGER_TYPE
3092 || !type_has_mode_precision_p (itype))
3093 return NULL;
3095 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3096 vectype = get_vectype_for_scalar_type (vinfo, itype);
3097 if (vectype == NULL_TREE)
3098 return NULL;
3100 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3102 /* If the target can handle vectorized division or modulo natively,
3103 don't attempt to optimize this, since native division is likely
3104 to give smaller code. */
3105 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3106 if (optab != unknown_optab)
3108 machine_mode vec_mode = TYPE_MODE (vectype);
3109 int icode = (int) optab_handler (optab, vec_mode);
3110 if (icode != CODE_FOR_nothing)
3111 return NULL;
3115 prec = TYPE_PRECISION (itype);
3116 if (integer_pow2p (oprnd1))
3118 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3119 return NULL;
3121 /* Pattern detected. */
3122 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3124 *type_out = vectype;
3126 /* Check if the target supports this internal function. */
3127 internal_fn ifn = IFN_DIV_POW2;
3128 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3130 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3132 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3133 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3134 gimple_call_set_lhs (div_stmt, var_div);
3136 if (rhs_code == TRUNC_MOD_EXPR)
3138 append_pattern_def_seq (stmt_vinfo, div_stmt);
3139 def_stmt
3140 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3141 LSHIFT_EXPR, var_div, shift);
3142 append_pattern_def_seq (stmt_vinfo, def_stmt);
3143 pattern_stmt
3144 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3145 MINUS_EXPR, oprnd0,
3146 gimple_assign_lhs (def_stmt));
3148 else
3149 pattern_stmt = div_stmt;
3150 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3152 return pattern_stmt;
3155 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3156 build_int_cst (itype, 0));
3157 if (rhs_code == TRUNC_DIV_EXPR
3158 || rhs_code == EXACT_DIV_EXPR)
3160 tree var = vect_recog_temp_ssa_var (itype, NULL);
3161 tree shift;
3162 def_stmt
3163 = gimple_build_assign (var, COND_EXPR, cond,
3164 fold_build2 (MINUS_EXPR, itype, oprnd1,
3165 build_int_cst (itype, 1)),
3166 build_int_cst (itype, 0));
3167 append_pattern_def_seq (stmt_vinfo, def_stmt);
3168 var = vect_recog_temp_ssa_var (itype, NULL);
3169 def_stmt
3170 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3171 gimple_assign_lhs (def_stmt));
3172 append_pattern_def_seq (stmt_vinfo, def_stmt);
3174 shift = build_int_cst (itype, tree_log2 (oprnd1));
3175 pattern_stmt
3176 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3177 RSHIFT_EXPR, var, shift);
3179 else
3181 tree signmask;
3182 if (compare_tree_int (oprnd1, 2) == 0)
3184 signmask = vect_recog_temp_ssa_var (itype, NULL);
3185 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3186 build_int_cst (itype, 1),
3187 build_int_cst (itype, 0));
3188 append_pattern_def_seq (stmt_vinfo, def_stmt);
3190 else
3192 tree utype
3193 = build_nonstandard_integer_type (prec, 1);
3194 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3195 tree shift
3196 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3197 - tree_log2 (oprnd1));
3198 tree var = vect_recog_temp_ssa_var (utype, NULL);
3200 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3201 build_int_cst (utype, -1),
3202 build_int_cst (utype, 0));
3203 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
3204 var = vect_recog_temp_ssa_var (utype, NULL);
3205 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3206 gimple_assign_lhs (def_stmt),
3207 shift);
3208 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
3209 signmask = vect_recog_temp_ssa_var (itype, NULL);
3210 def_stmt
3211 = gimple_build_assign (signmask, NOP_EXPR, var);
3212 append_pattern_def_seq (stmt_vinfo, def_stmt);
3214 def_stmt
3215 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3216 PLUS_EXPR, oprnd0, signmask);
3217 append_pattern_def_seq (stmt_vinfo, def_stmt);
3218 def_stmt
3219 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3220 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3221 fold_build2 (MINUS_EXPR, itype, oprnd1,
3222 build_int_cst (itype, 1)));
3223 append_pattern_def_seq (stmt_vinfo, def_stmt);
3225 pattern_stmt
3226 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3227 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3228 signmask);
3231 return pattern_stmt;
3234 if (prec > HOST_BITS_PER_WIDE_INT
3235 || integer_zerop (oprnd1))
3236 return NULL;
3238 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3239 return NULL;
3241 if (TYPE_UNSIGNED (itype))
3243 unsigned HOST_WIDE_INT mh, ml;
3244 int pre_shift, post_shift;
3245 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3246 & GET_MODE_MASK (itype_mode));
3247 tree t1, t2, t3, t4;
3249 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3250 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3251 return NULL;
3253 /* Find a suitable multiplier and right shift count
3254 instead of multiplying with D. */
3255 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3257 /* If the suggested multiplier is more than SIZE bits, we can do better
3258 for even divisors, using an initial right shift. */
3259 if (mh != 0 && (d & 1) == 0)
3261 pre_shift = ctz_or_zero (d);
3262 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3263 &ml, &post_shift, &dummy_int);
3264 gcc_assert (!mh);
3266 else
3267 pre_shift = 0;
3269 if (mh != 0)
3271 if (post_shift - 1 >= prec)
3272 return NULL;
3274 /* t1 = oprnd0 h* ml;
3275 t2 = oprnd0 - t1;
3276 t3 = t2 >> 1;
3277 t4 = t1 + t3;
3278 q = t4 >> (post_shift - 1); */
3279 t1 = vect_recog_temp_ssa_var (itype, NULL);
3280 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3281 build_int_cst (itype, ml));
3282 append_pattern_def_seq (stmt_vinfo, def_stmt);
3284 t2 = vect_recog_temp_ssa_var (itype, NULL);
3285 def_stmt
3286 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3287 append_pattern_def_seq (stmt_vinfo, def_stmt);
3289 t3 = vect_recog_temp_ssa_var (itype, NULL);
3290 def_stmt
3291 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3292 append_pattern_def_seq (stmt_vinfo, def_stmt);
3294 t4 = vect_recog_temp_ssa_var (itype, NULL);
3295 def_stmt
3296 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3298 if (post_shift != 1)
3300 append_pattern_def_seq (stmt_vinfo, def_stmt);
3302 q = vect_recog_temp_ssa_var (itype, NULL);
3303 pattern_stmt
3304 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3305 build_int_cst (itype, post_shift - 1));
3307 else
3309 q = t4;
3310 pattern_stmt = def_stmt;
3313 else
3315 if (pre_shift >= prec || post_shift >= prec)
3316 return NULL;
3318 /* t1 = oprnd0 >> pre_shift;
3319 t2 = t1 h* ml;
3320 q = t2 >> post_shift; */
3321 if (pre_shift)
3323 t1 = vect_recog_temp_ssa_var (itype, NULL);
3324 def_stmt
3325 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3326 build_int_cst (NULL, pre_shift));
3327 append_pattern_def_seq (stmt_vinfo, def_stmt);
3329 else
3330 t1 = oprnd0;
3332 t2 = vect_recog_temp_ssa_var (itype, NULL);
3333 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3334 build_int_cst (itype, ml));
3336 if (post_shift)
3338 append_pattern_def_seq (stmt_vinfo, def_stmt);
3340 q = vect_recog_temp_ssa_var (itype, NULL);
3341 def_stmt
3342 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3343 build_int_cst (itype, post_shift));
3345 else
3346 q = t2;
3348 pattern_stmt = def_stmt;
3351 else
3353 unsigned HOST_WIDE_INT ml;
3354 int post_shift;
3355 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3356 unsigned HOST_WIDE_INT abs_d;
3357 bool add = false;
3358 tree t1, t2, t3, t4;
3360 /* Give up for -1. */
3361 if (d == -1)
3362 return NULL;
3364 /* Since d might be INT_MIN, we have to cast to
3365 unsigned HOST_WIDE_INT before negating to avoid
3366 undefined signed overflow. */
3367 abs_d = (d >= 0
3368 ? (unsigned HOST_WIDE_INT) d
3369 : - (unsigned HOST_WIDE_INT) d);
3371 /* n rem d = n rem -d */
3372 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3374 d = abs_d;
3375 oprnd1 = build_int_cst (itype, abs_d);
3377 else if (HOST_BITS_PER_WIDE_INT >= prec
3378 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3379 /* This case is not handled correctly below. */
3380 return NULL;
3382 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3383 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3385 add = true;
3386 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3388 if (post_shift >= prec)
3389 return NULL;
3391 /* t1 = oprnd0 h* ml; */
3392 t1 = vect_recog_temp_ssa_var (itype, NULL);
3393 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3394 build_int_cst (itype, ml));
3396 if (add)
3398 /* t2 = t1 + oprnd0; */
3399 append_pattern_def_seq (stmt_vinfo, def_stmt);
3400 t2 = vect_recog_temp_ssa_var (itype, NULL);
3401 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3403 else
3404 t2 = t1;
3406 if (post_shift)
3408 /* t3 = t2 >> post_shift; */
3409 append_pattern_def_seq (stmt_vinfo, def_stmt);
3410 t3 = vect_recog_temp_ssa_var (itype, NULL);
3411 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3412 build_int_cst (itype, post_shift));
3414 else
3415 t3 = t2;
3417 wide_int oprnd0_min, oprnd0_max;
3418 int msb = 1;
3419 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3421 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3422 msb = 0;
3423 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3424 msb = -1;
3427 if (msb == 0 && d >= 0)
3429 /* q = t3; */
3430 q = t3;
3431 pattern_stmt = def_stmt;
3433 else
3435 /* t4 = oprnd0 >> (prec - 1);
3436 or if we know from VRP that oprnd0 >= 0
3437 t4 = 0;
3438 or if we know from VRP that oprnd0 < 0
3439 t4 = -1; */
3440 append_pattern_def_seq (stmt_vinfo, def_stmt);
3441 t4 = vect_recog_temp_ssa_var (itype, NULL);
3442 if (msb != 1)
3443 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3444 build_int_cst (itype, msb));
3445 else
3446 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3447 build_int_cst (itype, prec - 1));
3448 append_pattern_def_seq (stmt_vinfo, def_stmt);
3450 /* q = t3 - t4; or q = t4 - t3; */
3451 q = vect_recog_temp_ssa_var (itype, NULL);
3452 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3453 d < 0 ? t3 : t4);
3457 if (rhs_code == TRUNC_MOD_EXPR)
3459 tree r, t1;
3461 /* We divided. Now finish by:
3462 t1 = q * oprnd1;
3463 r = oprnd0 - t1; */
3464 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3466 t1 = vect_recog_temp_ssa_var (itype, NULL);
3467 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3468 append_pattern_def_seq (stmt_vinfo, def_stmt);
3470 r = vect_recog_temp_ssa_var (itype, NULL);
3471 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3474 /* Pattern detected. */
3475 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3477 *type_out = vectype;
3478 return pattern_stmt;
3481 /* Function vect_recog_mixed_size_cond_pattern
3483 Try to find the following pattern:
3485 type x_t, y_t;
3486 TYPE a_T, b_T, c_T;
3487 loop:
3488 S1 a_T = x_t CMP y_t ? b_T : c_T;
3490 where type 'TYPE' is an integral type which has different size
3491 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3492 than 'type', the constants need to fit into an integer type
3493 with the same width as 'type') or results of conversion from 'type'.
3495 Input:
3497 * STMT_VINFO: The stmt from which the pattern search begins.
3499 Output:
3501 * TYPE_OUT: The type of the output of this pattern.
3503 * Return value: A new stmt that will be used to replace the pattern.
3504 Additionally a def_stmt is added.
3506 a_it = x_t CMP y_t ? b_it : c_it;
3507 a_T = (TYPE) a_it; */
3509 static gimple *
3510 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3512 vec_info *vinfo = stmt_vinfo->vinfo;
3513 gimple *last_stmt = stmt_vinfo->stmt;
3514 tree cond_expr, then_clause, else_clause;
3515 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3516 gimple *pattern_stmt, *def_stmt;
3517 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3518 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3519 bool promotion;
3520 tree comp_scalar_type;
3522 if (!is_gimple_assign (last_stmt)
3523 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3524 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3525 return NULL;
3527 cond_expr = gimple_assign_rhs1 (last_stmt);
3528 then_clause = gimple_assign_rhs2 (last_stmt);
3529 else_clause = gimple_assign_rhs3 (last_stmt);
3531 if (!COMPARISON_CLASS_P (cond_expr))
3532 return NULL;
3534 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3535 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3536 if (comp_vectype == NULL_TREE)
3537 return NULL;
3539 type = gimple_expr_type (last_stmt);
3540 if (types_compatible_p (type, comp_scalar_type)
3541 || ((TREE_CODE (then_clause) != INTEGER_CST
3542 || TREE_CODE (else_clause) != INTEGER_CST)
3543 && !INTEGRAL_TYPE_P (comp_scalar_type))
3544 || !INTEGRAL_TYPE_P (type))
3545 return NULL;
3547 if ((TREE_CODE (then_clause) != INTEGER_CST
3548 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3549 &def_stmt0, &promotion))
3550 || (TREE_CODE (else_clause) != INTEGER_CST
3551 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3552 &def_stmt1, &promotion)))
3553 return NULL;
3555 if (orig_type0 && orig_type1
3556 && !types_compatible_p (orig_type0, orig_type1))
3557 return NULL;
3559 if (orig_type0)
3561 if (!types_compatible_p (orig_type0, comp_scalar_type))
3562 return NULL;
3563 then_clause = gimple_assign_rhs1 (def_stmt0);
3564 itype = orig_type0;
3567 if (orig_type1)
3569 if (!types_compatible_p (orig_type1, comp_scalar_type))
3570 return NULL;
3571 else_clause = gimple_assign_rhs1 (def_stmt1);
3572 itype = orig_type1;
3576 HOST_WIDE_INT cmp_mode_size
3577 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3579 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3580 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3581 return NULL;
3583 vectype = get_vectype_for_scalar_type (vinfo, type);
3584 if (vectype == NULL_TREE)
3585 return NULL;
3587 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3588 return NULL;
3590 if (itype == NULL_TREE)
3591 itype = build_nonstandard_integer_type (cmp_mode_size,
3592 TYPE_UNSIGNED (type));
3594 if (itype == NULL_TREE
3595 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3596 return NULL;
3598 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3599 if (vecitype == NULL_TREE)
3600 return NULL;
3602 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3603 return NULL;
3605 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3607 if ((TREE_CODE (then_clause) == INTEGER_CST
3608 && !int_fits_type_p (then_clause, itype))
3609 || (TREE_CODE (else_clause) == INTEGER_CST
3610 && !int_fits_type_p (else_clause, itype)))
3611 return NULL;
3614 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3615 COND_EXPR, unshare_expr (cond_expr),
3616 fold_convert (itype, then_clause),
3617 fold_convert (itype, else_clause));
3618 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3619 NOP_EXPR, gimple_assign_lhs (def_stmt));
3621 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3622 *type_out = vectype;
3624 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3626 return pattern_stmt;
3630 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3631 true if bool VAR can and should be optimized that way. Assume it shouldn't
3632 in case it's a result of a comparison which can be directly vectorized into
3633 a vector comparison. Fills in STMTS with all stmts visited during the
3634 walk. */
3636 static bool
3637 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3639 tree rhs1;
3640 enum tree_code rhs_code;
3642 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3643 if (!def_stmt_info)
3644 return false;
3646 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3647 if (!def_stmt)
3648 return false;
3650 if (stmts.contains (def_stmt))
3651 return true;
3653 rhs1 = gimple_assign_rhs1 (def_stmt);
3654 rhs_code = gimple_assign_rhs_code (def_stmt);
3655 switch (rhs_code)
3657 case SSA_NAME:
3658 if (! check_bool_pattern (rhs1, vinfo, stmts))
3659 return false;
3660 break;
3662 CASE_CONVERT:
3663 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3664 return false;
3665 if (! check_bool_pattern (rhs1, vinfo, stmts))
3666 return false;
3667 break;
3669 case BIT_NOT_EXPR:
3670 if (! check_bool_pattern (rhs1, vinfo, stmts))
3671 return false;
3672 break;
3674 case BIT_AND_EXPR:
3675 case BIT_IOR_EXPR:
3676 case BIT_XOR_EXPR:
3677 if (! check_bool_pattern (rhs1, vinfo, stmts)
3678 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3679 return false;
3680 break;
3682 default:
3683 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3685 tree vecitype, comp_vectype;
3687 /* If the comparison can throw, then is_gimple_condexpr will be
3688 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3689 if (stmt_could_throw_p (cfun, def_stmt))
3690 return false;
3692 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3693 if (comp_vectype == NULL_TREE)
3694 return false;
3696 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3697 TREE_TYPE (rhs1));
3698 if (mask_type
3699 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3700 return false;
3702 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3704 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3705 tree itype
3706 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3707 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3708 if (vecitype == NULL_TREE)
3709 return false;
3711 else
3712 vecitype = comp_vectype;
3713 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3714 return false;
3716 else
3717 return false;
3718 break;
3721 bool res = stmts.add (def_stmt);
3722 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3723 gcc_assert (!res);
3725 return true;
3729 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3730 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3731 pattern sequence. */
3733 static tree
3734 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3736 vec_info *vinfo = stmt_info->vinfo;
3737 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3738 NOP_EXPR, var);
3739 append_pattern_def_seq (stmt_info, cast_stmt,
3740 get_vectype_for_scalar_type (vinfo, type));
3741 return gimple_assign_lhs (cast_stmt);
3744 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3745 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3746 type, OUT_TYPE is the desired final integer type of the whole pattern.
3747 STMT_INFO is the info of the pattern root and is where pattern stmts should
3748 be associated with. DEFS is a map of pattern defs. */
3750 static void
3751 adjust_bool_pattern (tree var, tree out_type,
3752 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3754 vec_info *vinfo = stmt_info->vinfo;
3755 gimple *stmt = SSA_NAME_DEF_STMT (var);
3756 enum tree_code rhs_code, def_rhs_code;
3757 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3758 location_t loc;
3759 gimple *pattern_stmt, *def_stmt;
3760 tree trueval = NULL_TREE;
3762 rhs1 = gimple_assign_rhs1 (stmt);
3763 rhs2 = gimple_assign_rhs2 (stmt);
3764 rhs_code = gimple_assign_rhs_code (stmt);
3765 loc = gimple_location (stmt);
3766 switch (rhs_code)
3768 case SSA_NAME:
3769 CASE_CONVERT:
3770 irhs1 = *defs.get (rhs1);
3771 itype = TREE_TYPE (irhs1);
3772 pattern_stmt
3773 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3774 SSA_NAME, irhs1);
3775 break;
3777 case BIT_NOT_EXPR:
3778 irhs1 = *defs.get (rhs1);
3779 itype = TREE_TYPE (irhs1);
3780 pattern_stmt
3781 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3782 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3783 break;
3785 case BIT_AND_EXPR:
3786 /* Try to optimize x = y & (a < b ? 1 : 0); into
3787 x = (a < b ? y : 0);
3789 E.g. for:
3790 bool a_b, b_b, c_b;
3791 TYPE d_T;
3793 S1 a_b = x1 CMP1 y1;
3794 S2 b_b = x2 CMP2 y2;
3795 S3 c_b = a_b & b_b;
3796 S4 d_T = (TYPE) c_b;
3798 we would normally emit:
3800 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3801 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3802 S3' c_T = a_T & b_T;
3803 S4' d_T = c_T;
3805 but we can save one stmt by using the
3806 result of one of the COND_EXPRs in the other COND_EXPR and leave
3807 BIT_AND_EXPR stmt out:
3809 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3810 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3811 S4' f_T = c_T;
3813 At least when VEC_COND_EXPR is implemented using masks
3814 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3815 computes the comparison masks and ands it, in one case with
3816 all ones vector, in the other case with a vector register.
3817 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3818 often more expensive. */
3819 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3820 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3821 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3823 irhs1 = *defs.get (rhs1);
3824 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3825 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3826 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3828 rhs_code = def_rhs_code;
3829 rhs1 = def_rhs1;
3830 rhs2 = gimple_assign_rhs2 (def_stmt);
3831 trueval = irhs1;
3832 goto do_compare;
3834 else
3835 irhs2 = *defs.get (rhs2);
3836 goto and_ior_xor;
3838 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3839 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3840 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3842 irhs2 = *defs.get (rhs2);
3843 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3844 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3845 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3847 rhs_code = def_rhs_code;
3848 rhs1 = def_rhs1;
3849 rhs2 = gimple_assign_rhs2 (def_stmt);
3850 trueval = irhs2;
3851 goto do_compare;
3853 else
3854 irhs1 = *defs.get (rhs1);
3855 goto and_ior_xor;
3857 /* FALLTHRU */
3858 case BIT_IOR_EXPR:
3859 case BIT_XOR_EXPR:
3860 irhs1 = *defs.get (rhs1);
3861 irhs2 = *defs.get (rhs2);
3862 and_ior_xor:
3863 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3864 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3866 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3867 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3868 int out_prec = TYPE_PRECISION (out_type);
3869 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3870 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3871 stmt_info);
3872 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3873 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3874 stmt_info);
3875 else
3877 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3878 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3881 itype = TREE_TYPE (irhs1);
3882 pattern_stmt
3883 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3884 rhs_code, irhs1, irhs2);
3885 break;
3887 default:
3888 do_compare:
3889 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3890 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3891 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3892 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3893 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3895 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3896 itype
3897 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3899 else
3900 itype = TREE_TYPE (rhs1);
3901 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3902 if (trueval == NULL_TREE)
3903 trueval = build_int_cst (itype, 1);
3904 else
3905 gcc_checking_assert (useless_type_conversion_p (itype,
3906 TREE_TYPE (trueval)));
3907 pattern_stmt
3908 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3909 COND_EXPR, cond_expr, trueval,
3910 build_int_cst (itype, 0));
3911 break;
3914 gimple_set_location (pattern_stmt, loc);
3915 append_pattern_def_seq (stmt_info, pattern_stmt,
3916 get_vectype_for_scalar_type (vinfo, itype));
3917 defs.put (var, gimple_assign_lhs (pattern_stmt));
3920 /* Comparison function to qsort a vector of gimple stmts after UID. */
3922 static int
3923 sort_after_uid (const void *p1, const void *p2)
3925 const gimple *stmt1 = *(const gimple * const *)p1;
3926 const gimple *stmt2 = *(const gimple * const *)p2;
3927 return gimple_uid (stmt1) - gimple_uid (stmt2);
3930 /* Create pattern stmts for all stmts participating in the bool pattern
3931 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3932 OUT_TYPE. Return the def of the pattern root. */
3934 static tree
3935 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3936 tree out_type, stmt_vec_info stmt_info)
3938 /* Gather original stmts in the bool pattern in their order of appearance
3939 in the IL. */
3940 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3941 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3942 i != bool_stmt_set.end (); ++i)
3943 bool_stmts.quick_push (*i);
3944 bool_stmts.qsort (sort_after_uid);
3946 /* Now process them in that order, producing pattern stmts. */
3947 hash_map <tree, tree> defs;
3948 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3949 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3950 out_type, stmt_info, defs);
3952 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3953 gimple *pattern_stmt
3954 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3955 return gimple_assign_lhs (pattern_stmt);
3958 /* Return the proper type for converting bool VAR into
3959 an integer value or NULL_TREE if no such type exists.
3960 The type is chosen so that the converted value has the
3961 same number of elements as VAR's vector type. */
3963 static tree
3964 integer_type_for_mask (tree var, vec_info *vinfo)
3966 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3967 return NULL_TREE;
3969 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3970 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
3971 return NULL_TREE;
3973 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
3976 /* Function vect_recog_bool_pattern
3978 Try to find pattern like following:
3980 bool a_b, b_b, c_b, d_b, e_b;
3981 TYPE f_T;
3982 loop:
3983 S1 a_b = x1 CMP1 y1;
3984 S2 b_b = x2 CMP2 y2;
3985 S3 c_b = a_b & b_b;
3986 S4 d_b = x3 CMP3 y3;
3987 S5 e_b = c_b | d_b;
3988 S6 f_T = (TYPE) e_b;
3990 where type 'TYPE' is an integral type. Or a similar pattern
3991 ending in
3993 S6 f_Y = e_b ? r_Y : s_Y;
3995 as results from if-conversion of a complex condition.
3997 Input:
3999 * STMT_VINFO: The stmt at the end from which the pattern
4000 search begins, i.e. cast of a bool to
4001 an integer type.
4003 Output:
4005 * TYPE_OUT: The type of the output of this pattern.
4007 * Return value: A new stmt that will be used to replace the pattern.
4009 Assuming size of TYPE is the same as size of all comparisons
4010 (otherwise some casts would be added where needed), the above
4011 sequence we create related pattern stmts:
4012 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4013 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4014 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4015 S5' e_T = c_T | d_T;
4016 S6' f_T = e_T;
4018 Instead of the above S3' we could emit:
4019 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4020 S3' c_T = a_T | b_T;
4021 but the above is more efficient. */
4023 static gimple *
4024 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
4026 gimple *last_stmt = stmt_vinfo->stmt;
4027 enum tree_code rhs_code;
4028 tree var, lhs, rhs, vectype;
4029 vec_info *vinfo = stmt_vinfo->vinfo;
4030 gimple *pattern_stmt;
4032 if (!is_gimple_assign (last_stmt))
4033 return NULL;
4035 var = gimple_assign_rhs1 (last_stmt);
4036 lhs = gimple_assign_lhs (last_stmt);
4038 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4039 return NULL;
4041 hash_set<gimple *> bool_stmts;
4043 rhs_code = gimple_assign_rhs_code (last_stmt);
4044 if (CONVERT_EXPR_CODE_P (rhs_code))
4046 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4047 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
4048 return NULL;
4049 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4050 if (vectype == NULL_TREE)
4051 return NULL;
4053 if (check_bool_pattern (var, vinfo, bool_stmts))
4055 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
4056 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4057 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4058 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4059 else
4060 pattern_stmt
4061 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4063 else
4065 tree type = integer_type_for_mask (var, vinfo);
4066 tree cst0, cst1, tmp;
4068 if (!type)
4069 return NULL;
4071 /* We may directly use cond with narrowed type to avoid
4072 multiple cond exprs with following result packing and
4073 perform single cond with packed mask instead. In case
4074 of widening we better make cond first and then extract
4075 results. */
4076 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4077 type = TREE_TYPE (lhs);
4079 cst0 = build_int_cst (type, 0);
4080 cst1 = build_int_cst (type, 1);
4081 tmp = vect_recog_temp_ssa_var (type, NULL);
4082 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4084 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4086 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4087 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
4089 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4090 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4094 *type_out = vectype;
4095 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4097 return pattern_stmt;
4099 else if (rhs_code == COND_EXPR
4100 && TREE_CODE (var) == SSA_NAME)
4102 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4103 if (vectype == NULL_TREE)
4104 return NULL;
4106 /* Build a scalar type for the boolean result that when
4107 vectorized matches the vector type of the result in
4108 size and number of elements. */
4109 unsigned prec
4110 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4111 TYPE_VECTOR_SUBPARTS (vectype));
4113 tree type
4114 = build_nonstandard_integer_type (prec,
4115 TYPE_UNSIGNED (TREE_TYPE (var)));
4116 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4117 return NULL;
4119 if (!check_bool_pattern (var, vinfo, bool_stmts))
4120 return NULL;
4122 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
4124 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4125 pattern_stmt
4126 = gimple_build_assign (lhs, COND_EXPR,
4127 build2 (NE_EXPR, boolean_type_node,
4128 rhs, build_int_cst (type, 0)),
4129 gimple_assign_rhs2 (last_stmt),
4130 gimple_assign_rhs3 (last_stmt));
4131 *type_out = vectype;
4132 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4134 return pattern_stmt;
4136 else if (rhs_code == SSA_NAME
4137 && STMT_VINFO_DATA_REF (stmt_vinfo))
4139 stmt_vec_info pattern_stmt_info;
4140 tree nunits_vectype;
4141 if (!vect_get_vector_types_for_stmt (stmt_vinfo, &vectype,
4142 &nunits_vectype)
4143 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4144 return NULL;
4146 if (check_bool_pattern (var, vinfo, bool_stmts))
4147 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
4148 else
4150 tree type = integer_type_for_mask (var, vinfo);
4151 tree cst0, cst1, new_vectype;
4153 if (!type)
4154 return NULL;
4156 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4157 type = TREE_TYPE (vectype);
4159 cst0 = build_int_cst (type, 0);
4160 cst1 = build_int_cst (type, 1);
4161 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4163 rhs = vect_recog_temp_ssa_var (type, NULL);
4164 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4165 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
4168 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4169 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4171 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4172 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4173 append_pattern_def_seq (stmt_vinfo, cast_stmt);
4174 rhs = rhs2;
4176 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4177 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4178 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4179 *type_out = vectype;
4180 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4182 return pattern_stmt;
4184 else
4185 return NULL;
4189 /* A helper for vect_recog_mask_conversion_pattern. Build
4190 conversion of MASK to a type suitable for masking VECTYPE.
4191 Built statement gets required vectype and is appended to
4192 a pattern sequence of STMT_VINFO.
4194 Return converted mask. */
4196 static tree
4197 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4199 gimple *stmt;
4200 tree masktype, tmp;
4202 masktype = truth_type_for (vectype);
4203 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4204 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4205 append_pattern_def_seq (stmt_vinfo, stmt, masktype, TREE_TYPE (vectype));
4207 return tmp;
4211 /* Function vect_recog_mask_conversion_pattern
4213 Try to find statements which require boolean type
4214 converison. Additional conversion statements are
4215 added to handle such cases. For example:
4217 bool m_1, m_2, m_3;
4218 int i_4, i_5;
4219 double d_6, d_7;
4220 char c_1, c_2, c_3;
4222 S1 m_1 = i_4 > i_5;
4223 S2 m_2 = d_6 < d_7;
4224 S3 m_3 = m_1 & m_2;
4225 S4 c_1 = m_3 ? c_2 : c_3;
4227 Will be transformed into:
4229 S1 m_1 = i_4 > i_5;
4230 S2 m_2 = d_6 < d_7;
4231 S3'' m_2' = (_Bool[bitsize=32])m_2
4232 S3' m_3' = m_1 & m_2';
4233 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4234 S4' c_1' = m_3'' ? c_2 : c_3; */
4236 static gimple *
4237 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
4239 gimple *last_stmt = stmt_vinfo->stmt;
4240 enum tree_code rhs_code;
4241 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4242 tree vectype1, vectype2;
4243 stmt_vec_info pattern_stmt_info;
4244 vec_info *vinfo = stmt_vinfo->vinfo;
4246 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4247 if (is_gimple_call (last_stmt)
4248 && gimple_call_internal_p (last_stmt))
4250 gcall *pattern_stmt;
4252 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4253 int mask_argno = internal_fn_mask_index (ifn);
4254 if (mask_argno < 0)
4255 return NULL;
4257 bool store_p = internal_store_fn_p (ifn);
4258 if (store_p)
4260 int rhs_index = internal_fn_stored_value_index (ifn);
4261 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4262 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4264 else
4266 lhs = gimple_call_lhs (last_stmt);
4267 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4270 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4271 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4272 if (!mask_arg_type)
4273 return NULL;
4274 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4276 if (!vectype1 || !vectype2
4277 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4278 TYPE_VECTOR_SUBPARTS (vectype2)))
4279 return NULL;
4281 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
4283 auto_vec<tree, 8> args;
4284 unsigned int nargs = gimple_call_num_args (last_stmt);
4285 args.safe_grow (nargs);
4286 for (unsigned int i = 0; i < nargs; ++i)
4287 args[i] = ((int) i == mask_argno
4288 ? tmp
4289 : gimple_call_arg (last_stmt, i));
4290 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4292 if (!store_p)
4294 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4295 gimple_call_set_lhs (pattern_stmt, lhs);
4297 gimple_call_set_nothrow (pattern_stmt, true);
4299 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4300 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4301 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4303 *type_out = vectype1;
4304 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4306 return pattern_stmt;
4309 if (!is_gimple_assign (last_stmt))
4310 return NULL;
4312 gimple *pattern_stmt;
4313 lhs = gimple_assign_lhs (last_stmt);
4314 rhs1 = gimple_assign_rhs1 (last_stmt);
4315 rhs_code = gimple_assign_rhs_code (last_stmt);
4317 /* Check for cond expression requiring mask conversion. */
4318 if (rhs_code == COND_EXPR)
4320 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4322 if (TREE_CODE (rhs1) == SSA_NAME)
4324 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4325 if (!rhs1_type)
4326 return NULL;
4328 else if (COMPARISON_CLASS_P (rhs1))
4330 /* Check whether we're comparing scalar booleans and (if so)
4331 whether a better mask type exists than the mask associated
4332 with boolean-sized elements. This avoids unnecessary packs
4333 and unpacks if the booleans are set from comparisons of
4334 wider types. E.g. in:
4336 int x1, x2, x3, x4, y1, y1;
4338 bool b1 = (x1 == x2);
4339 bool b2 = (x3 == x4);
4340 ... = b1 == b2 ? y1 : y2;
4342 it is better for b1 and b2 to use the mask type associated
4343 with int elements rather bool (byte) elements. */
4344 rhs1_type = integer_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4345 if (!rhs1_type)
4346 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4348 else
4349 return NULL;
4351 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4353 if (!vectype1 || !vectype2)
4354 return NULL;
4356 /* Continue if a conversion is needed. Also continue if we have
4357 a comparison whose vector type would normally be different from
4358 VECTYPE2 when considered in isolation. In that case we'll
4359 replace the comparison with an SSA name (so that we can record
4360 its vector type) and behave as though the comparison was an SSA
4361 name from the outset. */
4362 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4363 TYPE_VECTOR_SUBPARTS (vectype2))
4364 && (TREE_CODE (rhs1) == SSA_NAME
4365 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4366 return NULL;
4368 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4369 in place, we can handle it in vectorizable_condition. This avoids
4370 unnecessary promotion stmts and increased vectorization factor. */
4371 if (COMPARISON_CLASS_P (rhs1)
4372 && INTEGRAL_TYPE_P (rhs1_type)
4373 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4374 TYPE_VECTOR_SUBPARTS (vectype2)))
4376 enum vect_def_type dt;
4377 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4378 && dt == vect_external_def
4379 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4380 && (dt == vect_external_def
4381 || dt == vect_constant_def))
4383 tree wide_scalar_type = build_nonstandard_integer_type
4384 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4385 TYPE_UNSIGNED (rhs1_type));
4386 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4387 wide_scalar_type);
4388 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4389 return NULL;
4393 /* If rhs1 is a comparison we need to move it into a
4394 separate statement. */
4395 if (TREE_CODE (rhs1) != SSA_NAME)
4397 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4398 pattern_stmt = gimple_build_assign (tmp, rhs1);
4399 rhs1 = tmp;
4400 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2,
4401 rhs1_type);
4404 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4405 TYPE_VECTOR_SUBPARTS (vectype2)))
4406 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4407 else
4408 tmp = rhs1;
4410 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4411 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4412 gimple_assign_rhs2 (last_stmt),
4413 gimple_assign_rhs3 (last_stmt));
4415 *type_out = vectype1;
4416 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4418 return pattern_stmt;
4421 /* Now check for binary boolean operations requiring conversion for
4422 one of operands. */
4423 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4424 return NULL;
4426 if (rhs_code != BIT_IOR_EXPR
4427 && rhs_code != BIT_XOR_EXPR
4428 && rhs_code != BIT_AND_EXPR
4429 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4430 return NULL;
4432 rhs2 = gimple_assign_rhs2 (last_stmt);
4434 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4435 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4437 if (!rhs1_type || !rhs2_type
4438 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4439 return NULL;
4441 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4443 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4444 if (!vectype1)
4445 return NULL;
4446 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4448 else
4450 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4451 if (!vectype1)
4452 return NULL;
4453 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4456 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4457 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4459 *type_out = vectype1;
4460 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4462 return pattern_stmt;
4465 /* STMT_INFO is a load or store. If the load or store is conditional, return
4466 the boolean condition under which it occurs, otherwise return null. */
4468 static tree
4469 vect_get_load_store_mask (stmt_vec_info stmt_info)
4471 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4473 gcc_assert (gimple_assign_single_p (def_assign));
4474 return NULL_TREE;
4477 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4479 internal_fn ifn = gimple_call_internal_fn (def_call);
4480 int mask_index = internal_fn_mask_index (ifn);
4481 return gimple_call_arg (def_call, mask_index);
4484 gcc_unreachable ();
4487 /* Return MASK if MASK is suitable for masking an operation on vectors
4488 of type VECTYPE, otherwise convert it into such a form and return
4489 the result. Associate any conversion statements with STMT_INFO's
4490 pattern. */
4492 static tree
4493 vect_convert_mask_for_vectype (tree mask, tree vectype,
4494 stmt_vec_info stmt_info, vec_info *vinfo)
4496 tree mask_type = integer_type_for_mask (mask, vinfo);
4497 if (mask_type)
4499 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4500 if (mask_vectype
4501 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4502 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4503 mask = build_mask_conversion (mask, vectype, stmt_info);
4505 return mask;
4508 /* Return the equivalent of:
4510 fold_convert (TYPE, VALUE)
4512 with the expectation that the operation will be vectorized.
4513 If new statements are needed, add them as pattern statements
4514 to STMT_INFO. */
4516 static tree
4517 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4519 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4520 return value;
4522 vec_info *vinfo = stmt_info->vinfo;
4523 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4524 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4525 append_pattern_def_seq (stmt_info, conversion,
4526 get_vectype_for_scalar_type (vinfo, type));
4527 return new_value;
4530 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4531 internal function. Return the final statement on success and set
4532 *TYPE_OUT to the vector type being loaded or stored.
4534 This function only handles gathers and scatters that were recognized
4535 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4537 static gimple *
4538 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4540 /* Currently we only support this for loop vectorization. */
4541 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4542 if (!loop_vinfo)
4543 return NULL;
4545 /* Make sure that we're looking at a gather load or scatter store. */
4546 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4547 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4548 return NULL;
4550 /* Get the boolean that controls whether the load or store happens.
4551 This is null if the operation is unconditional. */
4552 tree mask = vect_get_load_store_mask (stmt_info);
4554 /* Make sure that the target supports an appropriate internal
4555 function for the gather/scatter operation. */
4556 gather_scatter_info gs_info;
4557 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4558 || gs_info.decl)
4559 return NULL;
4561 /* Convert the mask to the right form. */
4562 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4563 gs_info.element_type);
4564 if (mask)
4565 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4566 loop_vinfo);
4568 /* Get the invariant base and non-invariant offset, converting the
4569 latter to the same width as the vector elements. */
4570 tree base = gs_info.base;
4571 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4572 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4573 stmt_info);
4575 /* Build the new pattern statement. */
4576 tree scale = size_int (gs_info.scale);
4577 gcall *pattern_stmt;
4578 if (DR_IS_READ (dr))
4580 tree zero = build_zero_cst (gs_info.element_type);
4581 if (mask != NULL)
4582 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4583 offset, scale, zero, mask);
4584 else
4585 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4586 offset, scale, zero);
4587 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4588 gimple_call_set_lhs (pattern_stmt, load_lhs);
4590 else
4592 tree rhs = vect_get_store_rhs (stmt_info);
4593 if (mask != NULL)
4594 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4595 base, offset, scale, rhs,
4596 mask);
4597 else
4598 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4599 base, offset, scale, rhs);
4601 gimple_call_set_nothrow (pattern_stmt, true);
4603 /* Copy across relevant vectorization info and associate DR with the
4604 new pattern statement instead of the original statement. */
4605 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4606 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4608 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4609 *type_out = vectype;
4610 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4612 return pattern_stmt;
4615 /* Return true if TYPE is a non-boolean integer type. These are the types
4616 that we want to consider for narrowing. */
4618 static bool
4619 vect_narrowable_type_p (tree type)
4621 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4624 /* Return true if the operation given by CODE can be truncated to N bits
4625 when only N bits of the output are needed. This is only true if bit N+1
4626 of the inputs has no effect on the low N bits of the result. */
4628 static bool
4629 vect_truncatable_operation_p (tree_code code)
4631 switch (code)
4633 case PLUS_EXPR:
4634 case MINUS_EXPR:
4635 case MULT_EXPR:
4636 case BIT_AND_EXPR:
4637 case BIT_IOR_EXPR:
4638 case BIT_XOR_EXPR:
4639 case COND_EXPR:
4640 return true;
4642 default:
4643 return false;
4647 /* Record that STMT_INFO could be changed from operating on TYPE to
4648 operating on a type with the precision and sign given by PRECISION
4649 and SIGN respectively. PRECISION is an arbitrary bit precision;
4650 it might not be a whole number of bytes. */
4652 static void
4653 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4654 unsigned int precision, signop sign)
4656 /* Round the precision up to a whole number of bytes. */
4657 precision = vect_element_precision (precision);
4658 if (precision < TYPE_PRECISION (type)
4659 && (!stmt_info->operation_precision
4660 || stmt_info->operation_precision > precision))
4662 stmt_info->operation_precision = precision;
4663 stmt_info->operation_sign = sign;
4667 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4668 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4669 is an arbitrary bit precision; it might not be a whole number of bytes. */
4671 static void
4672 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4673 unsigned int min_input_precision)
4675 /* This operation in isolation only requires the inputs to have
4676 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4677 that MIN_INPUT_PRECISION is a natural precision for the chain
4678 as a whole. E.g. consider something like:
4680 unsigned short *x, *y;
4681 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4683 The right shift can be done on unsigned chars, and only requires the
4684 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4685 approach would mean turning a natural chain of single-vector unsigned
4686 short operations into one that truncates "*x" and then extends
4687 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4688 operation and one vector for each unsigned char operation.
4689 This would be a significant pessimization.
4691 Instead only propagate the maximum of this precision and the precision
4692 required by the users of the result. This means that we don't pessimize
4693 the case above but continue to optimize things like:
4695 unsigned char *y;
4696 unsigned short *x;
4697 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4699 Here we would truncate two vectors of *x to a single vector of
4700 unsigned chars and use single-vector unsigned char operations for
4701 everything else, rather than doing two unsigned short copies of
4702 "(*x & 0xf0) >> 4" and then truncating the result. */
4703 min_input_precision = MAX (min_input_precision,
4704 stmt_info->min_output_precision);
4706 if (min_input_precision < TYPE_PRECISION (type)
4707 && (!stmt_info->min_input_precision
4708 || stmt_info->min_input_precision > min_input_precision))
4709 stmt_info->min_input_precision = min_input_precision;
4712 /* Subroutine of vect_determine_min_output_precision. Return true if
4713 we can calculate a reduced number of output bits for STMT_INFO,
4714 whose result is LHS. */
4716 static bool
4717 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4719 /* Take the maximum precision required by users of the result. */
4720 vec_info *vinfo = stmt_info->vinfo;
4721 unsigned int precision = 0;
4722 imm_use_iterator iter;
4723 use_operand_p use;
4724 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4726 gimple *use_stmt = USE_STMT (use);
4727 if (is_gimple_debug (use_stmt))
4728 continue;
4729 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4730 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4731 return false;
4732 /* The input precision recorded for COND_EXPRs applies only to the
4733 "then" and "else" values. */
4734 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4735 if (assign
4736 && gimple_assign_rhs_code (assign) == COND_EXPR
4737 && use->use != gimple_assign_rhs2_ptr (assign)
4738 && use->use != gimple_assign_rhs3_ptr (assign))
4739 return false;
4740 precision = MAX (precision, use_stmt_info->min_input_precision);
4743 if (dump_enabled_p ())
4744 dump_printf_loc (MSG_NOTE, vect_location,
4745 "only the low %d bits of %T are significant\n",
4746 precision, lhs);
4747 stmt_info->min_output_precision = precision;
4748 return true;
4751 /* Calculate min_output_precision for STMT_INFO. */
4753 static void
4754 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4756 /* We're only interested in statements with a narrowable result. */
4757 tree lhs = gimple_get_lhs (stmt_info->stmt);
4758 if (!lhs
4759 || TREE_CODE (lhs) != SSA_NAME
4760 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4761 return;
4763 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4764 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4767 /* Use range information to decide whether STMT (described by STMT_INFO)
4768 could be done in a narrower type. This is effectively a forward
4769 propagation, since it uses context-independent information that applies
4770 to all users of an SSA name. */
4772 static void
4773 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4775 tree lhs = gimple_assign_lhs (stmt);
4776 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4777 return;
4779 tree type = TREE_TYPE (lhs);
4780 if (!vect_narrowable_type_p (type))
4781 return;
4783 /* First see whether we have any useful range information for the result. */
4784 unsigned int precision = TYPE_PRECISION (type);
4785 signop sign = TYPE_SIGN (type);
4786 wide_int min_value, max_value;
4787 if (!vect_get_range_info (lhs, &min_value, &max_value))
4788 return;
4790 tree_code code = gimple_assign_rhs_code (stmt);
4791 unsigned int nops = gimple_num_ops (stmt);
4793 if (!vect_truncatable_operation_p (code))
4794 /* Check that all relevant input operands are compatible, and update
4795 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4796 for (unsigned int i = 1; i < nops; ++i)
4798 tree op = gimple_op (stmt, i);
4799 if (TREE_CODE (op) == INTEGER_CST)
4801 /* Don't require the integer to have RHS_TYPE (which it might
4802 not for things like shift amounts, etc.), but do require it
4803 to fit the type. */
4804 if (!int_fits_type_p (op, type))
4805 return;
4807 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4808 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4810 else if (TREE_CODE (op) == SSA_NAME)
4812 /* Ignore codes that don't take uniform arguments. */
4813 if (!types_compatible_p (TREE_TYPE (op), type))
4814 return;
4816 wide_int op_min_value, op_max_value;
4817 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4818 return;
4820 min_value = wi::min (min_value, op_min_value, sign);
4821 max_value = wi::max (max_value, op_max_value, sign);
4823 else
4824 return;
4827 /* Try to switch signed types for unsigned types if we can.
4828 This is better for two reasons. First, unsigned ops tend
4829 to be cheaper than signed ops. Second, it means that we can
4830 handle things like:
4832 signed char c;
4833 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4837 signed char c;
4838 unsigned short res_1 = (unsigned short) c & 0xff00;
4839 int res = (int) res_1;
4841 where the intermediate result res_1 has unsigned rather than
4842 signed type. */
4843 if (sign == SIGNED && !wi::neg_p (min_value))
4844 sign = UNSIGNED;
4846 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4847 unsigned int precision1 = wi::min_precision (min_value, sign);
4848 unsigned int precision2 = wi::min_precision (max_value, sign);
4849 unsigned int value_precision = MAX (precision1, precision2);
4850 if (value_precision >= precision)
4851 return;
4853 if (dump_enabled_p ())
4854 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4855 " without loss of precision: %G",
4856 sign == SIGNED ? "signed" : "unsigned",
4857 value_precision, stmt);
4859 vect_set_operation_type (stmt_info, type, value_precision, sign);
4860 vect_set_min_input_precision (stmt_info, type, value_precision);
4863 /* Use information about the users of STMT's result to decide whether
4864 STMT (described by STMT_INFO) could be done in a narrower type.
4865 This is effectively a backward propagation. */
4867 static void
4868 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4870 tree_code code = gimple_assign_rhs_code (stmt);
4871 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4872 tree type = TREE_TYPE (gimple_op (stmt, opno));
4873 if (!vect_narrowable_type_p (type))
4874 return;
4876 unsigned int precision = TYPE_PRECISION (type);
4877 unsigned int operation_precision, min_input_precision;
4878 switch (code)
4880 CASE_CONVERT:
4881 /* Only the bits that contribute to the output matter. Don't change
4882 the precision of the operation itself. */
4883 operation_precision = precision;
4884 min_input_precision = stmt_info->min_output_precision;
4885 break;
4887 case LSHIFT_EXPR:
4888 case RSHIFT_EXPR:
4890 tree shift = gimple_assign_rhs2 (stmt);
4891 if (TREE_CODE (shift) != INTEGER_CST
4892 || !wi::ltu_p (wi::to_widest (shift), precision))
4893 return;
4894 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4895 if (code == LSHIFT_EXPR)
4897 /* We need CONST_SHIFT fewer bits of the input. */
4898 operation_precision = stmt_info->min_output_precision;
4899 min_input_precision = (MAX (operation_precision, const_shift)
4900 - const_shift);
4902 else
4904 /* We need CONST_SHIFT extra bits to do the operation. */
4905 operation_precision = (stmt_info->min_output_precision
4906 + const_shift);
4907 min_input_precision = operation_precision;
4909 break;
4912 default:
4913 if (vect_truncatable_operation_p (code))
4915 /* Input bit N has no effect on output bits N-1 and lower. */
4916 operation_precision = stmt_info->min_output_precision;
4917 min_input_precision = operation_precision;
4918 break;
4920 return;
4923 if (operation_precision < precision)
4925 if (dump_enabled_p ())
4926 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4927 " without affecting users: %G",
4928 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4929 operation_precision, stmt);
4930 vect_set_operation_type (stmt_info, type, operation_precision,
4931 TYPE_SIGN (type));
4933 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4936 /* Return true if the statement described by STMT_INFO sets a boolean
4937 SSA_NAME and if we know how to vectorize this kind of statement using
4938 vector mask types. */
4940 static bool
4941 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
4943 tree lhs = gimple_get_lhs (stmt_info->stmt);
4944 if (!lhs
4945 || TREE_CODE (lhs) != SSA_NAME
4946 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4947 return false;
4949 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
4951 tree_code rhs_code = gimple_assign_rhs_code (assign);
4952 switch (rhs_code)
4954 CASE_CONVERT:
4955 case SSA_NAME:
4956 case BIT_NOT_EXPR:
4957 case BIT_IOR_EXPR:
4958 case BIT_XOR_EXPR:
4959 case BIT_AND_EXPR:
4960 return true;
4962 default:
4963 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
4966 return false;
4969 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
4970 a vector mask type instead of a normal vector type. Record the
4971 result in STMT_INFO->mask_precision. */
4973 static void
4974 vect_determine_mask_precision (stmt_vec_info stmt_info)
4976 vec_info *vinfo = stmt_info->vinfo;
4978 if (!possible_vector_mask_operation_p (stmt_info)
4979 || stmt_info->mask_precision)
4980 return;
4982 auto_vec<stmt_vec_info, 32> worklist;
4983 worklist.quick_push (stmt_info);
4984 while (!worklist.is_empty ())
4986 stmt_info = worklist.last ();
4987 unsigned int orig_length = worklist.length ();
4989 /* If at least one boolean input uses a vector mask type,
4990 pick the mask type with the narrowest elements.
4992 ??? This is the traditional behavior. It should always produce
4993 the smallest number of operations, but isn't necessarily the
4994 optimal choice. For example, if we have:
4996 a = b & c
4998 where:
5000 - the user of a wants it to have a mask type for 16-bit elements (M16)
5001 - b also uses M16
5002 - c uses a mask type for 8-bit elements (M8)
5004 then picking M8 gives:
5006 - 1 M16->M8 pack for b
5007 - 1 M8 AND for a
5008 - 2 M8->M16 unpacks for the user of a
5010 whereas picking M16 would have given:
5012 - 2 M8->M16 unpacks for c
5013 - 2 M16 ANDs for a
5015 The number of operations are equal, but M16 would have given
5016 a shorter dependency chain and allowed more ILP. */
5017 unsigned int precision = ~0U;
5018 gassign *assign = as_a <gassign *> (stmt_info->stmt);
5019 unsigned int nops = gimple_num_ops (assign);
5020 for (unsigned int i = 1; i < nops; ++i)
5022 tree rhs = gimple_op (assign, i);
5023 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5024 continue;
5026 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5027 if (!def_stmt_info)
5028 /* Don't let external or constant operands influence the choice.
5029 We can convert them to whichever vector type we pick. */
5030 continue;
5032 if (def_stmt_info->mask_precision)
5034 if (precision > def_stmt_info->mask_precision)
5035 precision = def_stmt_info->mask_precision;
5037 else if (possible_vector_mask_operation_p (def_stmt_info))
5038 worklist.safe_push (def_stmt_info);
5041 /* Defer the choice if we need to visit operands first. */
5042 if (orig_length != worklist.length ())
5043 continue;
5045 /* If the statement compares two values that shouldn't use vector masks,
5046 try comparing the values as normal scalars instead. */
5047 tree_code rhs_code = gimple_assign_rhs_code (assign);
5048 if (precision == ~0U
5049 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5051 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5052 scalar_mode mode;
5053 tree vectype, mask_type;
5054 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5055 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5056 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5057 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5058 precision = GET_MODE_BITSIZE (mode);
5061 if (dump_enabled_p ())
5063 if (precision == ~0U)
5064 dump_printf_loc (MSG_NOTE, vect_location,
5065 "using normal nonmask vectors for %G",
5066 stmt_info->stmt);
5067 else
5068 dump_printf_loc (MSG_NOTE, vect_location,
5069 "using boolean precision %d for %G",
5070 precision, stmt_info->stmt);
5073 stmt_info->mask_precision = precision;
5074 worklist.pop ();
5078 /* Handle vect_determine_precisions for STMT_INFO, given that we
5079 have already done so for the users of its result. */
5081 void
5082 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
5084 vect_determine_min_output_precision (stmt_info);
5085 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5087 vect_determine_precisions_from_range (stmt_info, stmt);
5088 vect_determine_precisions_from_users (stmt_info, stmt);
5090 vect_determine_mask_precision (stmt_info);
5093 /* Walk backwards through the vectorizable region to determine the
5094 values of these fields:
5096 - min_output_precision
5097 - min_input_precision
5098 - operation_precision
5099 - operation_sign. */
5101 void
5102 vect_determine_precisions (vec_info *vinfo)
5104 DUMP_VECT_SCOPE ("vect_determine_precisions");
5106 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5108 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5109 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5110 unsigned int nbbs = loop->num_nodes;
5112 for (unsigned int i = 0; i < nbbs; i++)
5114 basic_block bb = bbs[nbbs - i - 1];
5115 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5116 !gsi_end_p (si); gsi_prev (&si))
5117 vect_determine_stmt_precisions
5118 (vinfo->lookup_stmt (gsi_stmt (si)));
5121 else
5123 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5124 gimple_stmt_iterator si = bb_vinfo->region_end;
5125 gimple *stmt;
5128 if (!gsi_stmt (si))
5129 si = gsi_last_bb (bb_vinfo->bb);
5130 else
5131 gsi_prev (&si);
5132 stmt = gsi_stmt (si);
5133 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
5134 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5135 vect_determine_stmt_precisions (stmt_info);
5137 while (stmt != gsi_stmt (bb_vinfo->region_begin));
5141 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
5143 struct vect_recog_func
5145 vect_recog_func_ptr fn;
5146 const char *name;
5149 /* Note that ordering matters - the first pattern matching on a stmt is
5150 taken which means usually the more complex one needs to preceed the
5151 less comples onex (widen_sum only after dot_prod or sad for example). */
5152 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5153 { vect_recog_over_widening_pattern, "over_widening" },
5154 /* Must come after over_widening, which narrows the shift as much as
5155 possible beforehand. */
5156 { vect_recog_average_pattern, "average" },
5157 { vect_recog_mulhs_pattern, "mult_high" },
5158 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5159 { vect_recog_widen_mult_pattern, "widen_mult" },
5160 { vect_recog_dot_prod_pattern, "dot_prod" },
5161 { vect_recog_sad_pattern, "sad" },
5162 { vect_recog_widen_sum_pattern, "widen_sum" },
5163 { vect_recog_pow_pattern, "pow" },
5164 { vect_recog_widen_shift_pattern, "widen_shift" },
5165 { vect_recog_rotate_pattern, "rotate" },
5166 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5167 { vect_recog_divmod_pattern, "divmod" },
5168 { vect_recog_mult_pattern, "mult" },
5169 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5170 { vect_recog_bool_pattern, "bool" },
5171 /* This must come before mask conversion, and includes the parts
5172 of mask conversion that are needed for gather and scatter
5173 internal functions. */
5174 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5175 { vect_recog_mask_conversion_pattern, "mask_conversion" }
5178 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5180 /* Mark statements that are involved in a pattern. */
5182 static inline void
5183 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5184 tree pattern_vectype)
5186 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5187 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5189 gimple *orig_pattern_stmt = NULL;
5190 if (is_pattern_stmt_p (orig_stmt_info))
5192 /* We're replacing a statement in an existing pattern definition
5193 sequence. */
5194 orig_pattern_stmt = orig_stmt_info->stmt;
5195 if (dump_enabled_p ())
5196 dump_printf_loc (MSG_NOTE, vect_location,
5197 "replacing earlier pattern %G", orig_pattern_stmt);
5199 /* To keep the book-keeping simple, just swap the lhs of the
5200 old and new statements, so that the old one has a valid but
5201 unused lhs. */
5202 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5203 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5204 gimple_set_lhs (pattern_stmt, old_lhs);
5206 if (dump_enabled_p ())
5207 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5209 /* Switch to the statement that ORIG replaces. */
5210 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5212 /* We shouldn't be replacing the main pattern statement. */
5213 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5214 != orig_pattern_stmt);
5217 if (def_seq)
5218 for (gimple_stmt_iterator si = gsi_start (def_seq);
5219 !gsi_end_p (si); gsi_next (&si))
5221 if (dump_enabled_p ())
5222 dump_printf_loc (MSG_NOTE, vect_location,
5223 "extra pattern stmt: %G", gsi_stmt (si));
5224 stmt_vec_info pattern_stmt_info
5225 = vect_init_pattern_stmt (gsi_stmt (si),
5226 orig_stmt_info, pattern_vectype);
5227 /* Stmts in the def sequence are not vectorizable cycle or
5228 induction defs, instead they should all be vect_internal_def
5229 feeding the main pattern stmt which retains this def type. */
5230 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5233 if (orig_pattern_stmt)
5235 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
5237 /* Insert all the new pattern statements before the original one. */
5238 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5239 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5240 orig_def_seq);
5241 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5242 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5244 /* Remove the pattern statement that this new pattern replaces. */
5245 gsi_remove (&gsi, false);
5247 else
5248 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
5250 /* Transfer reduction path info to the pattern. */
5251 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5253 vec_info *vinfo = orig_stmt_info_saved->vinfo;
5254 tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5255 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5256 /* Search the pattern def sequence and the main pattern stmt. Note
5257 we may have inserted all into a containing pattern def sequence
5258 so the following is a bit awkward. */
5259 gimple_stmt_iterator si;
5260 gimple *s;
5261 if (def_seq)
5263 si = gsi_start (def_seq);
5264 s = gsi_stmt (si);
5265 gsi_next (&si);
5267 else
5269 si = gsi_none ();
5270 s = pattern_stmt;
5274 bool found = false;
5275 for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5276 if (gimple_op (s, i) == lookfor)
5278 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5279 lookfor = gimple_get_lhs (s);
5280 found = true;
5281 break;
5283 if (s == pattern_stmt)
5285 if (!found && dump_enabled_p ())
5286 dump_printf_loc (MSG_NOTE, vect_location,
5287 "failed to update reduction index.\n");
5288 break;
5290 if (gsi_end_p (si))
5291 s = pattern_stmt;
5292 else
5294 s = gsi_stmt (si);
5295 if (s == pattern_stmt)
5296 /* Found the end inside a bigger pattern def seq. */
5297 si = gsi_none ();
5298 else
5299 gsi_next (&si);
5301 } while (1);
5305 /* Function vect_pattern_recog_1
5307 Input:
5308 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5309 computation pattern.
5310 STMT_INFO: A stmt from which the pattern search should start.
5312 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5313 a sequence of statements that has the same functionality and can be
5314 used to replace STMT_INFO. It returns the last statement in the sequence
5315 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5316 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5317 statement, having first checked that the target supports the new operation
5318 in that type.
5320 This function also does some bookkeeping, as explained in the documentation
5321 for vect_recog_pattern. */
5323 static void
5324 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
5326 vec_info *vinfo = stmt_info->vinfo;
5327 gimple *pattern_stmt;
5328 loop_vec_info loop_vinfo;
5329 tree pattern_vectype;
5331 /* If this statement has already been replaced with pattern statements,
5332 leave the original statement alone, since the first match wins.
5333 Instead try to match against the definition statements that feed
5334 the main pattern statement. */
5335 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5337 gimple_stmt_iterator gsi;
5338 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5339 !gsi_end_p (gsi); gsi_next (&gsi))
5340 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
5341 return;
5344 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5345 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
5346 if (!pattern_stmt)
5348 /* Clear any half-formed pattern definition sequence. */
5349 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5350 return;
5353 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5354 gcc_assert (pattern_vectype);
5356 /* Found a vectorizable pattern. */
5357 if (dump_enabled_p ())
5358 dump_printf_loc (MSG_NOTE, vect_location,
5359 "%s pattern recognized: %G",
5360 recog_func->name, pattern_stmt);
5362 /* Mark the stmts that are involved in the pattern. */
5363 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
5365 /* Patterns cannot be vectorized using SLP, because they change the order of
5366 computation. */
5367 if (loop_vinfo)
5369 unsigned ix, ix2;
5370 stmt_vec_info *elem_ptr;
5371 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5372 elem_ptr, *elem_ptr == stmt_info);
5377 /* Function vect_pattern_recog
5379 Input:
5380 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5381 computation idioms.
5383 Output - for each computation idiom that is detected we create a new stmt
5384 that provides the same functionality and that can be vectorized. We
5385 also record some information in the struct_stmt_info of the relevant
5386 stmts, as explained below:
5388 At the entry to this function we have the following stmts, with the
5389 following initial value in the STMT_VINFO fields:
5391 stmt in_pattern_p related_stmt vec_stmt
5392 S1: a_i = .... - - -
5393 S2: a_2 = ..use(a_i).. - - -
5394 S3: a_1 = ..use(a_2).. - - -
5395 S4: a_0 = ..use(a_1).. - - -
5396 S5: ... = ..use(a_0).. - - -
5398 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5399 represented by a single stmt. We then:
5400 - create a new stmt S6 equivalent to the pattern (the stmt is not
5401 inserted into the code)
5402 - fill in the STMT_VINFO fields as follows:
5404 in_pattern_p related_stmt vec_stmt
5405 S1: a_i = .... - - -
5406 S2: a_2 = ..use(a_i).. - - -
5407 S3: a_1 = ..use(a_2).. - - -
5408 S4: a_0 = ..use(a_1).. true S6 -
5409 '---> S6: a_new = .... - S4 -
5410 S5: ... = ..use(a_0).. - - -
5412 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5413 to each other through the RELATED_STMT field).
5415 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5416 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5417 remain irrelevant unless used by stmts other than S4.
5419 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5420 (because they are marked as irrelevant). It will vectorize S6, and record
5421 a pointer to the new vector stmt VS6 from S6 (as usual).
5422 S4 will be skipped, and S5 will be vectorized as usual:
5424 in_pattern_p related_stmt vec_stmt
5425 S1: a_i = .... - - -
5426 S2: a_2 = ..use(a_i).. - - -
5427 S3: a_1 = ..use(a_2).. - - -
5428 > VS6: va_new = .... - - -
5429 S4: a_0 = ..use(a_1).. true S6 VS6
5430 '---> S6: a_new = .... - S4 VS6
5431 > VS5: ... = ..vuse(va_new).. - - -
5432 S5: ... = ..use(a_0).. - - -
5434 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5435 elsewhere), and we'll end up with:
5437 VS6: va_new = ....
5438 VS5: ... = ..vuse(va_new)..
5440 In case of more than one pattern statements, e.g., widen-mult with
5441 intermediate type:
5443 S1 a_t = ;
5444 S2 a_T = (TYPE) a_t;
5445 '--> S3: a_it = (interm_type) a_t;
5446 S4 prod_T = a_T * CONST;
5447 '--> S5: prod_T' = a_it w* CONST;
5449 there may be other users of a_T outside the pattern. In that case S2 will
5450 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5451 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5452 be recorded in S3. */
5454 void
5455 vect_pattern_recog (vec_info *vinfo)
5457 class loop *loop;
5458 basic_block *bbs;
5459 unsigned int nbbs;
5460 gimple_stmt_iterator si;
5461 unsigned int i, j;
5463 vect_determine_precisions (vinfo);
5465 DUMP_VECT_SCOPE ("vect_pattern_recog");
5467 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5469 loop = LOOP_VINFO_LOOP (loop_vinfo);
5470 bbs = LOOP_VINFO_BBS (loop_vinfo);
5471 nbbs = loop->num_nodes;
5473 /* Scan through the loop stmts, applying the pattern recognition
5474 functions starting at each stmt visited: */
5475 for (i = 0; i < nbbs; i++)
5477 basic_block bb = bbs[i];
5478 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5480 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5481 /* Scan over all generic vect_recog_xxx_pattern functions. */
5482 for (j = 0; j < NUM_PATTERNS; j++)
5483 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
5484 stmt_info);
5488 else
5490 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5491 for (si = bb_vinfo->region_begin;
5492 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
5494 gimple *stmt = gsi_stmt (si);
5495 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
5496 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
5497 continue;
5499 /* Scan over all generic vect_recog_xxx_pattern functions. */
5500 for (j = 0; j < NUM_PATTERNS; j++)
5501 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);