MAINTAINERS: Add myself as arc port maintainer
[official-gcc.git] / gcc / tree-vect-patterns.c
blobf2ce75aac3ebf39a5a6001c5cc33cf94a2942486
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
52 /* Return true if we have a useful VR_RANGE range for VAR, storing it
53 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
55 static bool
56 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
58 value_range_kind vr_type = get_range_info (var, min_value, max_value);
59 wide_int nonzero = get_nonzero_bits (var);
60 signop sgn = TYPE_SIGN (TREE_TYPE (var));
61 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 nonzero, sgn) == VR_RANGE)
64 if (dump_enabled_p ())
66 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 dump_printf (MSG_NOTE, " has range [");
68 dump_hex (MSG_NOTE, *min_value);
69 dump_printf (MSG_NOTE, ", ");
70 dump_hex (MSG_NOTE, *max_value);
71 dump_printf (MSG_NOTE, "]\n");
73 return true;
75 else
77 if (dump_enabled_p ())
79 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 dump_printf (MSG_NOTE, " has no range info\n");
82 return false;
86 /* Report that we've found an instance of pattern PATTERN in
87 statement STMT. */
89 static void
90 vect_pattern_detected (const char *name, gimple *stmt)
92 if (dump_enabled_p ())
93 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
96 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97 return the pattern statement's stmt_vec_info. Set its vector type to
98 VECTYPE if it doesn't have one already. */
100 static stmt_vec_info
101 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
102 stmt_vec_info orig_stmt_info, tree vectype)
104 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
105 if (pattern_stmt_info == NULL)
106 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
107 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
109 pattern_stmt_info->pattern_stmt_p = true;
110 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
111 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
112 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
113 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
115 gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
116 == vect_use_mask_type_p (orig_stmt_info));
117 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
118 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
120 return pattern_stmt_info;
123 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
124 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
125 have one already. */
127 static void
128 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
129 stmt_vec_info orig_stmt_info, tree vectype)
131 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
132 STMT_VINFO_RELATED_STMT (orig_stmt_info)
133 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
136 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
137 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
138 be different from the vector type of the final pattern statement.
139 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
140 from which it was derived. */
142 static inline void
143 append_pattern_def_seq (vec_info *vinfo,
144 stmt_vec_info stmt_info, gimple *new_stmt,
145 tree vectype = NULL_TREE,
146 tree scalar_type_for_mask = NULL_TREE)
148 gcc_assert (!scalar_type_for_mask
149 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
150 if (vectype)
152 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
153 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
154 if (scalar_type_for_mask)
155 new_stmt_info->mask_precision
156 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
158 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
159 new_stmt);
162 /* The caller wants to perform new operations on vect_external variable
163 VAR, so that the result of the operations would also be vect_external.
164 Return the edge on which the operations can be performed, if one exists.
165 Return null if the operations should instead be treated as part of
166 the pattern that needs them. */
168 static edge
169 vect_get_external_def_edge (vec_info *vinfo, tree var)
171 edge e = NULL;
172 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
174 e = loop_preheader_edge (loop_vinfo->loop);
175 if (!SSA_NAME_IS_DEFAULT_DEF (var))
177 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
178 if (bb == NULL
179 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
180 e = NULL;
183 return e;
186 /* Return true if the target supports a vector version of CODE,
187 where CODE is known to map to a direct optab. ITYPE specifies
188 the type of (some of) the scalar inputs and OTYPE specifies the
189 type of the scalar result.
191 If CODE allows the inputs and outputs to have different type
192 (such as for WIDEN_SUM_EXPR), it is the input mode rather
193 than the output mode that determines the appropriate target pattern.
194 Operand 0 of the target pattern then specifies the mode that the output
195 must have.
197 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
198 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
199 is nonnull. */
201 static bool
202 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
203 tree itype, tree *vecotype_out,
204 tree *vecitype_out = NULL)
206 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
207 if (!vecitype)
208 return false;
210 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
211 if (!vecotype)
212 return false;
214 optab optab = optab_for_tree_code (code, vecitype, optab_default);
215 if (!optab)
216 return false;
218 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
219 if (icode == CODE_FOR_nothing
220 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
221 return false;
223 *vecotype_out = vecotype;
224 if (vecitype_out)
225 *vecitype_out = vecitype;
226 return true;
229 /* Round bit precision PRECISION up to a full element. */
231 static unsigned int
232 vect_element_precision (unsigned int precision)
234 precision = 1 << ceil_log2 (precision);
235 return MAX (precision, BITS_PER_UNIT);
238 /* If OP is defined by a statement that's being considered for vectorization,
239 return information about that statement, otherwise return NULL. */
241 static stmt_vec_info
242 vect_get_internal_def (vec_info *vinfo, tree op)
244 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
245 if (def_stmt_info
246 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
247 return def_stmt_info;
248 return NULL;
251 /* Check whether NAME, an ssa-name used in STMT_VINFO,
252 is a result of a type promotion, such that:
253 DEF_STMT: NAME = NOP (name0)
254 If CHECK_SIGN is TRUE, check that either both types are signed or both are
255 unsigned. */
257 static bool
258 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
259 tree *orig_type, gimple **def_stmt, bool *promotion)
261 tree type = TREE_TYPE (name);
262 tree oprnd0;
263 enum vect_def_type dt;
265 stmt_vec_info def_stmt_info;
266 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
267 return false;
269 if (dt != vect_internal_def
270 && dt != vect_external_def && dt != vect_constant_def)
271 return false;
273 if (!*def_stmt)
274 return false;
276 if (!is_gimple_assign (*def_stmt))
277 return false;
279 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
280 return false;
282 oprnd0 = gimple_assign_rhs1 (*def_stmt);
284 *orig_type = TREE_TYPE (oprnd0);
285 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
286 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
287 return false;
289 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
290 *promotion = true;
291 else
292 *promotion = false;
294 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
295 return false;
297 return true;
300 /* Holds information about an input operand after some sign changes
301 and type promotions have been peeled away. */
302 class vect_unpromoted_value {
303 public:
304 vect_unpromoted_value ();
306 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
308 /* The value obtained after peeling away zero or more casts. */
309 tree op;
311 /* The type of OP. */
312 tree type;
314 /* The definition type of OP. */
315 vect_def_type dt;
317 /* If OP is the result of peeling at least one cast, and if the cast
318 of OP itself is a vectorizable statement, CASTER identifies that
319 statement, otherwise it is null. */
320 stmt_vec_info caster;
323 inline vect_unpromoted_value::vect_unpromoted_value ()
324 : op (NULL_TREE),
325 type (NULL_TREE),
326 dt (vect_uninitialized_def),
327 caster (NULL)
331 /* Set the operand to OP_IN, its definition type to DT_IN, and the
332 statement that casts it to CASTER_IN. */
334 inline void
335 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
336 stmt_vec_info caster_in)
338 op = op_in;
339 type = TREE_TYPE (op);
340 dt = dt_in;
341 caster = caster_in;
344 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
345 to reach some vectorizable inner operand OP', continuing as long as it
346 is possible to convert OP' back to OP using a possible sign change
347 followed by a possible promotion P. Return this OP', or null if OP is
348 not a vectorizable SSA name. If there is a promotion P, describe its
349 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
350 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
351 have more than one user.
353 A successful return means that it is possible to go from OP' to OP
354 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
355 whereas the cast from UNPROM to OP might be a promotion, a sign
356 change, or a nop.
358 E.g. say we have:
360 signed short *ptr = ...;
361 signed short C = *ptr;
362 unsigned short B = (unsigned short) C; // sign change
363 signed int A = (signed int) B; // unsigned promotion
364 ...possible other uses of A...
365 unsigned int OP = (unsigned int) A; // sign change
367 In this case it's possible to go directly from C to OP using:
369 OP = (unsigned int) (unsigned short) C;
370 +------------+ +--------------+
371 promotion sign change
373 so OP' would be C. The input to the promotion is B, so UNPROM
374 would describe B. */
376 static tree
377 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
378 vect_unpromoted_value *unprom,
379 bool *single_use_p = NULL)
381 tree res = NULL_TREE;
382 tree op_type = TREE_TYPE (op);
383 unsigned int orig_precision = TYPE_PRECISION (op_type);
384 unsigned int min_precision = orig_precision;
385 stmt_vec_info caster = NULL;
386 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
388 /* See whether OP is simple enough to vectorize. */
389 stmt_vec_info def_stmt_info;
390 gimple *def_stmt;
391 vect_def_type dt;
392 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
393 break;
395 /* If OP is the input of a demotion, skip over it to see whether
396 OP is itself the result of a promotion. If so, the combined
397 effect of the promotion and the demotion might fit the required
398 pattern, otherwise neither operation fits.
400 This copes with cases such as the result of an arithmetic
401 operation being truncated before being stored, and where that
402 arithmetic operation has been recognized as an over-widened one. */
403 if (TYPE_PRECISION (op_type) <= min_precision)
405 /* Use OP as the UNPROM described above if we haven't yet
406 found a promotion, or if using the new input preserves the
407 sign of the previous promotion. */
408 if (!res
409 || TYPE_PRECISION (unprom->type) == orig_precision
410 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
412 unprom->set_op (op, dt, caster);
413 min_precision = TYPE_PRECISION (op_type);
415 /* Stop if we've already seen a promotion and if this
416 conversion does more than change the sign. */
417 else if (TYPE_PRECISION (op_type)
418 != TYPE_PRECISION (unprom->type))
419 break;
421 /* The sequence now extends to OP. */
422 res = op;
425 /* See whether OP is defined by a cast. Record it as CASTER if
426 the cast is potentially vectorizable. */
427 if (!def_stmt)
428 break;
429 caster = def_stmt_info;
431 /* Ignore pattern statements, since we don't link uses for them. */
432 if (caster
433 && single_use_p
434 && !STMT_VINFO_RELATED_STMT (caster)
435 && !has_single_use (res))
436 *single_use_p = false;
438 gassign *assign = dyn_cast <gassign *> (def_stmt);
439 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
440 break;
442 /* Continue with the input to the cast. */
443 op = gimple_assign_rhs1 (def_stmt);
444 op_type = TREE_TYPE (op);
446 return res;
449 /* OP is an integer operand to an operation that returns TYPE, and we
450 want to treat the operation as a widening one. So far we can treat
451 it as widening from *COMMON_TYPE.
453 Return true if OP is suitable for such a widening operation,
454 either widening from *COMMON_TYPE or from some supertype of it.
455 Update *COMMON_TYPE to the supertype in the latter case.
457 SHIFT_P is true if OP is a shift amount. */
459 static bool
460 vect_joust_widened_integer (tree type, bool shift_p, tree op,
461 tree *common_type)
463 /* Calculate the minimum precision required by OP, without changing
464 the sign of either operand. */
465 unsigned int precision;
466 if (shift_p)
468 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
469 return false;
470 precision = TREE_INT_CST_LOW (op);
472 else
474 precision = wi::min_precision (wi::to_widest (op),
475 TYPE_SIGN (*common_type));
476 if (precision * 2 > TYPE_PRECISION (type))
477 return false;
480 /* If OP requires a wider type, switch to that type. The checks
481 above ensure that this is still narrower than the result. */
482 precision = vect_element_precision (precision);
483 if (TYPE_PRECISION (*common_type) < precision)
484 *common_type = build_nonstandard_integer_type
485 (precision, TYPE_UNSIGNED (*common_type));
486 return true;
489 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
490 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
492 static bool
493 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
495 if (types_compatible_p (*common_type, new_type))
496 return true;
498 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
499 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
500 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
501 return true;
503 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
504 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
505 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
507 *common_type = new_type;
508 return true;
511 /* We have mismatched signs, with the signed type being
512 no wider than the unsigned type. In this case we need
513 a wider signed type. */
514 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
515 TYPE_PRECISION (new_type));
516 precision *= 2;
517 if (precision * 2 > TYPE_PRECISION (type))
518 return false;
520 *common_type = build_nonstandard_integer_type (precision, false);
521 return true;
524 /* Check whether STMT_INFO can be viewed as a tree of integer operations
525 in which each node either performs CODE or WIDENED_CODE, and where
526 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
527 specifies the maximum number of leaf operands. SHIFT_P says whether
528 CODE and WIDENED_CODE are some sort of shift.
530 If STMT_INFO is such a tree, return the number of leaf operands
531 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
532 to a type that (a) is narrower than the result of STMT_INFO and
533 (b) can hold all leaf operand values.
535 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
536 exists. */
538 static unsigned int
539 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
540 tree_code widened_code, bool shift_p,
541 unsigned int max_nops,
542 vect_unpromoted_value *unprom, tree *common_type)
544 /* Check for an integer operation with the right code. */
545 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
546 if (!assign)
547 return 0;
549 tree_code rhs_code = gimple_assign_rhs_code (assign);
550 if (rhs_code != code && rhs_code != widened_code)
551 return 0;
553 tree type = gimple_expr_type (assign);
554 if (!INTEGRAL_TYPE_P (type))
555 return 0;
557 /* Assume that both operands will be leaf operands. */
558 max_nops -= 2;
560 /* Check the operands. */
561 unsigned int next_op = 0;
562 for (unsigned int i = 0; i < 2; ++i)
564 vect_unpromoted_value *this_unprom = &unprom[next_op];
565 unsigned int nops = 1;
566 tree op = gimple_op (assign, i + 1);
567 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
569 /* We already have a common type from earlier operands.
570 Update it to account for OP. */
571 this_unprom->set_op (op, vect_constant_def);
572 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
573 return 0;
575 else
577 /* Only allow shifts by constants. */
578 if (shift_p && i == 1)
579 return 0;
581 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
582 return 0;
584 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
586 /* The operand isn't widened. If STMT_INFO has the code
587 for an unwidened operation, recursively check whether
588 this operand is a node of the tree. */
589 if (rhs_code != code
590 || max_nops == 0
591 || this_unprom->dt != vect_internal_def)
592 return 0;
594 /* Give back the leaf slot allocated above now that we're
595 not treating this as a leaf operand. */
596 max_nops += 1;
598 /* Recursively process the definition of the operand. */
599 stmt_vec_info def_stmt_info
600 = vinfo->lookup_def (this_unprom->op);
601 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
602 widened_code, shift_p, max_nops,
603 this_unprom, common_type);
604 if (nops == 0)
605 return 0;
607 max_nops -= nops;
609 else
611 /* Make sure that the operand is narrower than the result. */
612 if (TYPE_PRECISION (this_unprom->type) * 2
613 > TYPE_PRECISION (type))
614 return 0;
616 /* Update COMMON_TYPE for the new operand. */
617 if (i == 0)
618 *common_type = this_unprom->type;
619 else if (!vect_joust_widened_type (type, this_unprom->type,
620 common_type))
621 return 0;
624 next_op += nops;
626 return next_op;
629 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
630 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
632 static tree
633 vect_recog_temp_ssa_var (tree type, gimple *stmt)
635 return make_temp_ssa_name (type, stmt, "patt");
638 /* STMT2_INFO describes a type conversion that could be split into STMT1
639 followed by a version of STMT2_INFO that takes NEW_RHS as its first
640 input. Try to do this using pattern statements, returning true on
641 success. */
643 static bool
644 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
645 gimple *stmt1, tree vectype)
647 if (is_pattern_stmt_p (stmt2_info))
649 /* STMT2_INFO is part of a pattern. Get the statement to which
650 the pattern is attached. */
651 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
652 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
654 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE, vect_location,
656 "Splitting pattern statement: %G", stmt2_info->stmt);
658 /* Since STMT2_INFO is a pattern statement, we can change it
659 in-situ without worrying about changing the code for the
660 containing block. */
661 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
663 if (dump_enabled_p ())
665 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
666 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
667 stmt2_info->stmt);
670 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
671 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
672 /* STMT2_INFO is the actual pattern statement. Add STMT1
673 to the end of the definition sequence. */
674 gimple_seq_add_stmt_without_update (def_seq, stmt1);
675 else
677 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
678 before it. */
679 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
680 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
682 return true;
684 else
686 /* STMT2_INFO doesn't yet have a pattern. Try to create a
687 two-statement pattern now. */
688 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
689 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
690 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
691 if (!lhs_vectype)
692 return false;
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_NOTE, vect_location,
696 "Splitting statement: %G", stmt2_info->stmt);
698 /* Add STMT1 as a singleton pattern definition sequence. */
699 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
700 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
701 gimple_seq_add_stmt_without_update (def_seq, stmt1);
703 /* Build the second of the two pattern statements. */
704 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
705 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
706 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
708 if (dump_enabled_p ())
710 dump_printf_loc (MSG_NOTE, vect_location,
711 "into pattern statements: %G", stmt1);
712 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
715 return true;
719 /* Convert UNPROM to TYPE and return the result, adding new statements
720 to STMT_INFO's pattern definition statements if no better way is
721 available. VECTYPE is the vector form of TYPE. */
723 static tree
724 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
725 vect_unpromoted_value *unprom, tree vectype)
727 /* Check for a no-op conversion. */
728 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
729 return unprom->op;
731 /* Allow the caller to create constant vect_unpromoted_values. */
732 if (TREE_CODE (unprom->op) == INTEGER_CST)
733 return wide_int_to_tree (type, wi::to_widest (unprom->op));
735 tree input = unprom->op;
736 if (unprom->caster)
738 tree lhs = gimple_get_lhs (unprom->caster->stmt);
739 tree lhs_type = TREE_TYPE (lhs);
741 /* If the result of the existing cast is the right width, use it
742 instead of the source of the cast. */
743 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
744 input = lhs;
745 /* If the precision we want is between the source and result
746 precisions of the existing cast, try splitting the cast into
747 two and tapping into a mid-way point. */
748 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
749 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
751 /* In order to preserve the semantics of the original cast,
752 give the mid-way point the same signedness as the input value.
754 It would be possible to use a signed type here instead if
755 TYPE is signed and UNPROM->TYPE is unsigned, but that would
756 make the sign of the midtype sensitive to the order in
757 which we process the statements, since the signedness of
758 TYPE is the signedness required by just one of possibly
759 many users. Also, unsigned promotions are usually as cheap
760 as or cheaper than signed ones, so it's better to keep an
761 unsigned promotion. */
762 tree midtype = build_nonstandard_integer_type
763 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
764 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
765 if (vec_midtype)
767 input = vect_recog_temp_ssa_var (midtype, NULL);
768 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
769 unprom->op);
770 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
771 vec_midtype))
772 append_pattern_def_seq (vinfo, stmt_info,
773 new_stmt, vec_midtype);
777 /* See if we can reuse an existing result. */
778 if (types_compatible_p (type, TREE_TYPE (input)))
779 return input;
782 /* We need a new conversion statement. */
783 tree new_op = vect_recog_temp_ssa_var (type, NULL);
784 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
786 /* If OP is an external value, see if we can insert the new statement
787 on an incoming edge. */
788 if (input == unprom->op && unprom->dt == vect_external_def)
789 if (edge e = vect_get_external_def_edge (vinfo, input))
791 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
792 gcc_assert (!new_bb);
793 return new_op;
796 /* As a (common) last resort, add the statement to the pattern itself. */
797 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
798 return new_op;
801 /* Invoke vect_convert_input for N elements of UNPROM and store the
802 result in the corresponding elements of RESULT. */
804 static void
805 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
806 tree *result, tree type, vect_unpromoted_value *unprom,
807 tree vectype)
809 for (unsigned int i = 0; i < n; ++i)
811 unsigned int j;
812 for (j = 0; j < i; ++j)
813 if (unprom[j].op == unprom[i].op)
814 break;
815 if (j < i)
816 result[i] = result[j];
817 else
818 result[i] = vect_convert_input (vinfo, stmt_info,
819 type, &unprom[i], vectype);
823 /* The caller has created a (possibly empty) sequence of pattern definition
824 statements followed by a single statement PATTERN_STMT. Cast the result
825 of this final statement to TYPE. If a new statement is needed, add
826 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
827 and return the new statement, otherwise return PATTERN_STMT as-is.
828 VECITYPE is the vector form of PATTERN_STMT's result type. */
830 static gimple *
831 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
832 gimple *pattern_stmt, tree vecitype)
834 tree lhs = gimple_get_lhs (pattern_stmt);
835 if (!types_compatible_p (type, TREE_TYPE (lhs)))
837 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
838 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
839 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
841 return pattern_stmt;
844 /* Return true if STMT_VINFO describes a reduction for which reassociation
845 is allowed. If STMT_INFO is part of a group, assume that it's part of
846 a reduction chain and optimistically assume that all statements
847 except the last allow reassociation.
848 Also require it to have code CODE and to be a reduction
849 in the outermost loop. When returning true, store the operands in
850 *OP0_OUT and *OP1_OUT. */
852 static bool
853 vect_reassociating_reduction_p (vec_info *vinfo,
854 stmt_vec_info stmt_info, tree_code code,
855 tree *op0_out, tree *op1_out)
857 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
858 if (!loop_info)
859 return false;
861 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
862 if (!assign || gimple_assign_rhs_code (assign) != code)
863 return false;
865 /* We don't allow changing the order of the computation in the inner-loop
866 when doing outer-loop vectorization. */
867 class loop *loop = LOOP_VINFO_LOOP (loop_info);
868 if (loop && nested_in_vect_loop_p (loop, stmt_info))
869 return false;
871 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
873 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
874 code))
875 return false;
877 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
878 return false;
880 *op0_out = gimple_assign_rhs1 (assign);
881 *op1_out = gimple_assign_rhs2 (assign);
882 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
883 std::swap (*op0_out, *op1_out);
884 return true;
887 /* Function vect_recog_dot_prod_pattern
889 Try to find the following pattern:
891 type x_t, y_t;
892 TYPE1 prod;
893 TYPE2 sum = init;
894 loop:
895 sum_0 = phi <init, sum_1>
896 S1 x_t = ...
897 S2 y_t = ...
898 S3 x_T = (TYPE1) x_t;
899 S4 y_T = (TYPE1) y_t;
900 S5 prod = x_T * y_T;
901 [S6 prod = (TYPE2) prod; #optional]
902 S7 sum_1 = prod + sum_0;
904 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
905 same size of 'TYPE1' or bigger. This is a special case of a reduction
906 computation.
908 Input:
910 * STMT_VINFO: The stmt from which the pattern search begins. In the
911 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
912 will be detected.
914 Output:
916 * TYPE_OUT: The type of the output of this pattern.
918 * Return value: A new stmt that will be used to replace the sequence of
919 stmts that constitute the pattern. In this case it will be:
920 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
922 Note: The dot-prod idiom is a widening reduction pattern that is
923 vectorized without preserving all the intermediate results. It
924 produces only N/2 (widened) results (by summing up pairs of
925 intermediate results) rather than all N results. Therefore, we
926 cannot allow this pattern when we want to get all the results and in
927 the correct order (as is the case when this computation is in an
928 inner-loop nested in an outer-loop that us being vectorized). */
930 static gimple *
931 vect_recog_dot_prod_pattern (vec_info *vinfo,
932 stmt_vec_info stmt_vinfo, tree *type_out)
934 tree oprnd0, oprnd1;
935 gimple *last_stmt = stmt_vinfo->stmt;
936 tree type, half_type;
937 gimple *pattern_stmt;
938 tree var;
940 /* Look for the following pattern
941 DX = (TYPE1) X;
942 DY = (TYPE1) Y;
943 DPROD = DX * DY;
944 DDPROD = (TYPE2) DPROD;
945 sum_1 = DDPROD + sum_0;
946 In which
947 - DX is double the size of X
948 - DY is double the size of Y
949 - DX, DY, DPROD all have the same type
950 - sum is the same size of DPROD or bigger
951 - sum has been recognized as a reduction variable.
953 This is equivalent to:
954 DPROD = X w* Y; #widen mult
955 sum_1 = DPROD w+ sum_0; #widen summation
957 DPROD = X w* Y; #widen mult
958 sum_1 = DPROD + sum_0; #summation
961 /* Starting from LAST_STMT, follow the defs of its uses in search
962 of the above pattern. */
964 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
965 &oprnd0, &oprnd1))
966 return NULL;
968 type = gimple_expr_type (last_stmt);
970 vect_unpromoted_value unprom_mult;
971 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
973 /* So far so good. Since last_stmt was detected as a (summation) reduction,
974 we know that oprnd1 is the reduction variable (defined by a loop-header
975 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
976 Left to check that oprnd0 is defined by a (widen_)mult_expr */
977 if (!oprnd0)
978 return NULL;
980 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
981 if (!mult_vinfo)
982 return NULL;
984 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
985 inside the loop (in case we are analyzing an outer-loop). */
986 vect_unpromoted_value unprom0[2];
987 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
988 false, 2, unprom0, &half_type))
989 return NULL;
991 /* If there are two widening operations, make sure they agree on
992 the sign of the extension. */
993 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
994 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
995 return NULL;
997 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
999 tree half_vectype;
1000 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1001 type_out, &half_vectype))
1002 return NULL;
1004 /* Get the inputs in the appropriate types. */
1005 tree mult_oprnd[2];
1006 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1007 unprom0, half_vectype);
1009 var = vect_recog_temp_ssa_var (type, NULL);
1010 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1011 mult_oprnd[0], mult_oprnd[1], oprnd1);
1013 return pattern_stmt;
1017 /* Function vect_recog_sad_pattern
1019 Try to find the following Sum of Absolute Difference (SAD) pattern:
1021 type x_t, y_t;
1022 signed TYPE1 diff, abs_diff;
1023 TYPE2 sum = init;
1024 loop:
1025 sum_0 = phi <init, sum_1>
1026 S1 x_t = ...
1027 S2 y_t = ...
1028 S3 x_T = (TYPE1) x_t;
1029 S4 y_T = (TYPE1) y_t;
1030 S5 diff = x_T - y_T;
1031 S6 abs_diff = ABS_EXPR <diff>;
1032 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1033 S8 sum_1 = abs_diff + sum_0;
1035 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1036 same size of 'TYPE1' or bigger. This is a special case of a reduction
1037 computation.
1039 Input:
1041 * STMT_VINFO: The stmt from which the pattern search begins. In the
1042 example, when this function is called with S8, the pattern
1043 {S3,S4,S5,S6,S7,S8} will be detected.
1045 Output:
1047 * TYPE_OUT: The type of the output of this pattern.
1049 * Return value: A new stmt that will be used to replace the sequence of
1050 stmts that constitute the pattern. In this case it will be:
1051 SAD_EXPR <x_t, y_t, sum_0>
1054 static gimple *
1055 vect_recog_sad_pattern (vec_info *vinfo,
1056 stmt_vec_info stmt_vinfo, tree *type_out)
1058 gimple *last_stmt = stmt_vinfo->stmt;
1059 tree half_type;
1061 /* Look for the following pattern
1062 DX = (TYPE1) X;
1063 DY = (TYPE1) Y;
1064 DDIFF = DX - DY;
1065 DAD = ABS_EXPR <DDIFF>;
1066 DDPROD = (TYPE2) DPROD;
1067 sum_1 = DAD + sum_0;
1068 In which
1069 - DX is at least double the size of X
1070 - DY is at least double the size of Y
1071 - DX, DY, DDIFF, DAD all have the same type
1072 - sum is the same size of DAD or bigger
1073 - sum has been recognized as a reduction variable.
1075 This is equivalent to:
1076 DDIFF = X w- Y; #widen sub
1077 DAD = ABS_EXPR <DDIFF>;
1078 sum_1 = DAD w+ sum_0; #widen summation
1080 DDIFF = X w- Y; #widen sub
1081 DAD = ABS_EXPR <DDIFF>;
1082 sum_1 = DAD + sum_0; #summation
1085 /* Starting from LAST_STMT, follow the defs of its uses in search
1086 of the above pattern. */
1088 tree plus_oprnd0, plus_oprnd1;
1089 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1090 &plus_oprnd0, &plus_oprnd1))
1091 return NULL;
1093 tree sum_type = gimple_expr_type (last_stmt);
1095 /* Any non-truncating sequence of conversions is OK here, since
1096 with a successful match, the result of the ABS(U) is known to fit
1097 within the nonnegative range of the result type. (It cannot be the
1098 negative of the minimum signed value due to the range of the widening
1099 MINUS_EXPR.) */
1100 vect_unpromoted_value unprom_abs;
1101 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1102 &unprom_abs);
1104 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1105 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1106 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1107 Then check that plus_oprnd0 is defined by an abs_expr. */
1109 if (!plus_oprnd0)
1110 return NULL;
1112 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1113 if (!abs_stmt_vinfo)
1114 return NULL;
1116 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1117 inside the loop (in case we are analyzing an outer-loop). */
1118 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1119 if (!abs_stmt
1120 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1121 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1122 return NULL;
1124 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1125 tree abs_type = TREE_TYPE (abs_oprnd);
1126 if (TYPE_UNSIGNED (abs_type))
1127 return NULL;
1129 /* Peel off conversions from the ABS input. This can involve sign
1130 changes (e.g. from an unsigned subtraction to a signed ABS input)
1131 or signed promotion, but it can't include unsigned promotion.
1132 (Note that ABS of an unsigned promotion should have been folded
1133 away before now anyway.) */
1134 vect_unpromoted_value unprom_diff;
1135 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1136 &unprom_diff);
1137 if (!abs_oprnd)
1138 return NULL;
1139 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1140 && TYPE_UNSIGNED (unprom_diff.type))
1141 return NULL;
1143 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1144 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1145 if (!diff_stmt_vinfo)
1146 return NULL;
1148 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1149 inside the loop (in case we are analyzing an outer-loop). */
1150 vect_unpromoted_value unprom[2];
1151 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1152 false, 2, unprom, &half_type))
1153 return NULL;
1155 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1157 tree half_vectype;
1158 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1159 type_out, &half_vectype))
1160 return NULL;
1162 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1163 tree sad_oprnd[2];
1164 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1165 unprom, half_vectype);
1167 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1168 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1169 sad_oprnd[1], plus_oprnd1);
1171 return pattern_stmt;
1174 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1175 so that it can be treated as though it had the form:
1177 A_TYPE a;
1178 B_TYPE b;
1179 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1180 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1181 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1182 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1183 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1185 Try to replace the pattern with:
1187 A_TYPE a;
1188 B_TYPE b;
1189 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1190 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1191 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1192 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1194 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1196 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1197 name of the pattern being matched, for dump purposes. */
1199 static gimple *
1200 vect_recog_widen_op_pattern (vec_info *vinfo,
1201 stmt_vec_info last_stmt_info, tree *type_out,
1202 tree_code orig_code, tree_code wide_code,
1203 bool shift_p, const char *name)
1205 gimple *last_stmt = last_stmt_info->stmt;
1207 vect_unpromoted_value unprom[2];
1208 tree half_type;
1209 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1210 shift_p, 2, unprom, &half_type))
1211 return NULL;
1213 /* Pattern detected. */
1214 vect_pattern_detected (name, last_stmt);
1216 tree type = gimple_expr_type (last_stmt);
1217 tree itype = type;
1218 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1219 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1220 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1221 TYPE_UNSIGNED (half_type));
1223 /* Check target support */
1224 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1225 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1226 enum tree_code dummy_code;
1227 int dummy_int;
1228 auto_vec<tree> dummy_vec;
1229 if (!vectype
1230 || !vecitype
1231 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1232 vecitype, vectype,
1233 &dummy_code, &dummy_code,
1234 &dummy_int, &dummy_vec))
1235 return NULL;
1237 *type_out = get_vectype_for_scalar_type (vinfo, type);
1238 if (!*type_out)
1239 return NULL;
1241 tree oprnd[2];
1242 vect_convert_inputs (vinfo, last_stmt_info,
1243 2, oprnd, half_type, unprom, vectype);
1245 tree var = vect_recog_temp_ssa_var (itype, NULL);
1246 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1247 oprnd[0], oprnd[1]);
1249 return vect_convert_output (vinfo, last_stmt_info,
1250 type, pattern_stmt, vecitype);
1253 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1254 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1256 static gimple *
1257 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1258 tree *type_out)
1260 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1261 MULT_EXPR, WIDEN_MULT_EXPR, false,
1262 "vect_recog_widen_mult_pattern");
1265 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1266 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1268 static gimple *
1269 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1270 tree *type_out)
1272 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1273 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1274 "vect_recog_widen_plus_pattern");
1277 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1278 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1279 static gimple *
1280 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1281 tree *type_out)
1283 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1284 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1285 "vect_recog_widen_minus_pattern");
1288 /* Function vect_recog_pow_pattern
1290 Try to find the following pattern:
1292 x = POW (y, N);
1294 with POW being one of pow, powf, powi, powif and N being
1295 either 2 or 0.5.
1297 Input:
1299 * STMT_VINFO: The stmt from which the pattern search begins.
1301 Output:
1303 * TYPE_OUT: The type of the output of this pattern.
1305 * Return value: A new stmt that will be used to replace the sequence of
1306 stmts that constitute the pattern. In this case it will be:
1307 x = x * x
1309 x = sqrt (x)
1312 static gimple *
1313 vect_recog_pow_pattern (vec_info *vinfo,
1314 stmt_vec_info stmt_vinfo, tree *type_out)
1316 gimple *last_stmt = stmt_vinfo->stmt;
1317 tree base, exp;
1318 gimple *stmt;
1319 tree var;
1321 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1322 return NULL;
1324 switch (gimple_call_combined_fn (last_stmt))
1326 CASE_CFN_POW:
1327 CASE_CFN_POWI:
1328 break;
1330 default:
1331 return NULL;
1334 base = gimple_call_arg (last_stmt, 0);
1335 exp = gimple_call_arg (last_stmt, 1);
1336 if (TREE_CODE (exp) != REAL_CST
1337 && TREE_CODE (exp) != INTEGER_CST)
1339 if (flag_unsafe_math_optimizations
1340 && TREE_CODE (base) == REAL_CST
1341 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1343 combined_fn log_cfn;
1344 built_in_function exp_bfn;
1345 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1347 case BUILT_IN_POW:
1348 log_cfn = CFN_BUILT_IN_LOG;
1349 exp_bfn = BUILT_IN_EXP;
1350 break;
1351 case BUILT_IN_POWF:
1352 log_cfn = CFN_BUILT_IN_LOGF;
1353 exp_bfn = BUILT_IN_EXPF;
1354 break;
1355 case BUILT_IN_POWL:
1356 log_cfn = CFN_BUILT_IN_LOGL;
1357 exp_bfn = BUILT_IN_EXPL;
1358 break;
1359 default:
1360 return NULL;
1362 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1363 tree exp_decl = builtin_decl_implicit (exp_bfn);
1364 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1365 does that, but if C is a power of 2, we want to use
1366 exp2 (log2 (C) * x) in the non-vectorized version, but for
1367 vectorization we don't have vectorized exp2. */
1368 if (logc
1369 && TREE_CODE (logc) == REAL_CST
1370 && exp_decl
1371 && lookup_attribute ("omp declare simd",
1372 DECL_ATTRIBUTES (exp_decl)))
1374 cgraph_node *node = cgraph_node::get_create (exp_decl);
1375 if (node->simd_clones == NULL)
1377 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1378 || node->definition)
1379 return NULL;
1380 expand_simd_clones (node);
1381 if (node->simd_clones == NULL)
1382 return NULL;
1384 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1385 if (!*type_out)
1386 return NULL;
1387 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1388 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1389 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1390 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1391 g = gimple_build_call (exp_decl, 1, def);
1392 gimple_call_set_lhs (g, res);
1393 return g;
1397 return NULL;
1400 /* We now have a pow or powi builtin function call with a constant
1401 exponent. */
1403 /* Catch squaring. */
1404 if ((tree_fits_shwi_p (exp)
1405 && tree_to_shwi (exp) == 2)
1406 || (TREE_CODE (exp) == REAL_CST
1407 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1409 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1410 TREE_TYPE (base), type_out))
1411 return NULL;
1413 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1414 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1415 return stmt;
1418 /* Catch square root. */
1419 if (TREE_CODE (exp) == REAL_CST
1420 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1422 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1423 if (*type_out
1424 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1425 OPTIMIZE_FOR_SPEED))
1427 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1428 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1429 gimple_call_set_lhs (stmt, var);
1430 gimple_call_set_nothrow (stmt, true);
1431 return stmt;
1435 return NULL;
1439 /* Function vect_recog_widen_sum_pattern
1441 Try to find the following pattern:
1443 type x_t;
1444 TYPE x_T, sum = init;
1445 loop:
1446 sum_0 = phi <init, sum_1>
1447 S1 x_t = *p;
1448 S2 x_T = (TYPE) x_t;
1449 S3 sum_1 = x_T + sum_0;
1451 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1452 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1453 a special case of a reduction computation.
1455 Input:
1457 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1458 when this function is called with S3, the pattern {S2,S3} will be detected.
1460 Output:
1462 * TYPE_OUT: The type of the output of this pattern.
1464 * Return value: A new stmt that will be used to replace the sequence of
1465 stmts that constitute the pattern. In this case it will be:
1466 WIDEN_SUM <x_t, sum_0>
1468 Note: The widening-sum idiom is a widening reduction pattern that is
1469 vectorized without preserving all the intermediate results. It
1470 produces only N/2 (widened) results (by summing up pairs of
1471 intermediate results) rather than all N results. Therefore, we
1472 cannot allow this pattern when we want to get all the results and in
1473 the correct order (as is the case when this computation is in an
1474 inner-loop nested in an outer-loop that us being vectorized). */
1476 static gimple *
1477 vect_recog_widen_sum_pattern (vec_info *vinfo,
1478 stmt_vec_info stmt_vinfo, tree *type_out)
1480 gimple *last_stmt = stmt_vinfo->stmt;
1481 tree oprnd0, oprnd1;
1482 tree type;
1483 gimple *pattern_stmt;
1484 tree var;
1486 /* Look for the following pattern
1487 DX = (TYPE) X;
1488 sum_1 = DX + sum_0;
1489 In which DX is at least double the size of X, and sum_1 has been
1490 recognized as a reduction variable.
1493 /* Starting from LAST_STMT, follow the defs of its uses in search
1494 of the above pattern. */
1496 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1497 &oprnd0, &oprnd1))
1498 return NULL;
1500 type = gimple_expr_type (last_stmt);
1502 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1503 we know that oprnd1 is the reduction variable (defined by a loop-header
1504 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1505 Left to check that oprnd0 is defined by a cast from type 'type' to type
1506 'TYPE'. */
1508 vect_unpromoted_value unprom0;
1509 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1510 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1511 return NULL;
1513 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1515 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1516 unprom0.type, type_out))
1517 return NULL;
1519 var = vect_recog_temp_ssa_var (type, NULL);
1520 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1522 return pattern_stmt;
1525 /* Recognize cases in which an operation is performed in one type WTYPE
1526 but could be done more efficiently in a narrower type NTYPE. For example,
1527 if we have:
1529 ATYPE a; // narrower than NTYPE
1530 BTYPE b; // narrower than NTYPE
1531 WTYPE aw = (WTYPE) a;
1532 WTYPE bw = (WTYPE) b;
1533 WTYPE res = aw + bw; // only uses of aw and bw
1535 then it would be more efficient to do:
1537 NTYPE an = (NTYPE) a;
1538 NTYPE bn = (NTYPE) b;
1539 NTYPE resn = an + bn;
1540 WTYPE res = (WTYPE) resn;
1542 Other situations include things like:
1544 ATYPE a; // NTYPE or narrower
1545 WTYPE aw = (WTYPE) a;
1546 WTYPE res = aw + b;
1548 when only "(NTYPE) res" is significant. In that case it's more efficient
1549 to truncate "b" and do the operation on NTYPE instead:
1551 NTYPE an = (NTYPE) a;
1552 NTYPE bn = (NTYPE) b; // truncation
1553 NTYPE resn = an + bn;
1554 WTYPE res = (WTYPE) resn;
1556 All users of "res" should then use "resn" instead, making the final
1557 statement dead (not marked as relevant). The final statement is still
1558 needed to maintain the type correctness of the IR.
1560 vect_determine_precisions has already determined the minimum
1561 precison of the operation and the minimum precision required
1562 by users of the result. */
1564 static gimple *
1565 vect_recog_over_widening_pattern (vec_info *vinfo,
1566 stmt_vec_info last_stmt_info, tree *type_out)
1568 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1569 if (!last_stmt)
1570 return NULL;
1572 /* See whether we have found that this operation can be done on a
1573 narrower type without changing its semantics. */
1574 unsigned int new_precision = last_stmt_info->operation_precision;
1575 if (!new_precision)
1576 return NULL;
1578 tree lhs = gimple_assign_lhs (last_stmt);
1579 tree type = TREE_TYPE (lhs);
1580 tree_code code = gimple_assign_rhs_code (last_stmt);
1582 /* Keep the first operand of a COND_EXPR as-is: only the other two
1583 operands are interesting. */
1584 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1586 /* Check the operands. */
1587 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1588 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1589 unprom.quick_grow (nops);
1590 unsigned int min_precision = 0;
1591 bool single_use_p = false;
1592 for (unsigned int i = 0; i < nops; ++i)
1594 tree op = gimple_op (last_stmt, first_op + i);
1595 if (TREE_CODE (op) == INTEGER_CST)
1596 unprom[i].set_op (op, vect_constant_def);
1597 else if (TREE_CODE (op) == SSA_NAME)
1599 bool op_single_use_p = true;
1600 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1601 &op_single_use_p))
1602 return NULL;
1603 /* If:
1605 (1) N bits of the result are needed;
1606 (2) all inputs are widened from M<N bits; and
1607 (3) one operand OP is a single-use SSA name
1609 we can shift the M->N widening from OP to the output
1610 without changing the number or type of extensions involved.
1611 This then reduces the number of copies of STMT_INFO.
1613 If instead of (3) more than one operand is a single-use SSA name,
1614 shifting the extension to the output is even more of a win.
1616 If instead:
1618 (1) N bits of the result are needed;
1619 (2) one operand OP2 is widened from M2<N bits;
1620 (3) another operand OP1 is widened from M1<M2 bits; and
1621 (4) both OP1 and OP2 are single-use
1623 the choice is between:
1625 (a) truncating OP2 to M1, doing the operation on M1,
1626 and then widening the result to N
1628 (b) widening OP1 to M2, doing the operation on M2, and then
1629 widening the result to N
1631 Both shift the M2->N widening of the inputs to the output.
1632 (a) additionally shifts the M1->M2 widening to the output;
1633 it requires fewer copies of STMT_INFO but requires an extra
1634 M2->M1 truncation.
1636 Which is better will depend on the complexity and cost of
1637 STMT_INFO, which is hard to predict at this stage. However,
1638 a clear tie-breaker in favor of (b) is the fact that the
1639 truncation in (a) increases the length of the operation chain.
1641 If instead of (4) only one of OP1 or OP2 is single-use,
1642 (b) is still a win over doing the operation in N bits:
1643 it still shifts the M2->N widening on the single-use operand
1644 to the output and reduces the number of STMT_INFO copies.
1646 If neither operand is single-use then operating on fewer than
1647 N bits might lead to more extensions overall. Whether it does
1648 or not depends on global information about the vectorization
1649 region, and whether that's a good trade-off would again
1650 depend on the complexity and cost of the statements involved,
1651 as well as things like register pressure that are not normally
1652 modelled at this stage. We therefore ignore these cases
1653 and just optimize the clear single-use wins above.
1655 Thus we take the maximum precision of the unpromoted operands
1656 and record whether any operand is single-use. */
1657 if (unprom[i].dt == vect_internal_def)
1659 min_precision = MAX (min_precision,
1660 TYPE_PRECISION (unprom[i].type));
1661 single_use_p |= op_single_use_p;
1664 else
1665 return NULL;
1668 /* Although the operation could be done in operation_precision, we have
1669 to balance that against introducing extra truncations or extensions.
1670 Calculate the minimum precision that can be handled efficiently.
1672 The loop above determined that the operation could be handled
1673 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1674 extension from the inputs to the output without introducing more
1675 instructions, and would reduce the number of instructions required
1676 for STMT_INFO itself.
1678 vect_determine_precisions has also determined that the result only
1679 needs min_output_precision bits. Truncating by a factor of N times
1680 requires a tree of N - 1 instructions, so if TYPE is N times wider
1681 than min_output_precision, doing the operation in TYPE and truncating
1682 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1683 In contrast:
1685 - truncating the input to a unary operation and doing the operation
1686 in the new type requires at most N - 1 + 1 = N instructions per
1687 output vector
1689 - doing the same for a binary operation requires at most
1690 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1692 Both unary and binary operations require fewer instructions than
1693 this if the operands were extended from a suitable truncated form.
1694 Thus there is usually nothing to lose by doing operations in
1695 min_output_precision bits, but there can be something to gain. */
1696 if (!single_use_p)
1697 min_precision = last_stmt_info->min_output_precision;
1698 else
1699 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1701 /* Apply the minimum efficient precision we just calculated. */
1702 if (new_precision < min_precision)
1703 new_precision = min_precision;
1704 if (new_precision >= TYPE_PRECISION (type))
1705 return NULL;
1707 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1709 *type_out = get_vectype_for_scalar_type (vinfo, type);
1710 if (!*type_out)
1711 return NULL;
1713 /* We've found a viable pattern. Get the new type of the operation. */
1714 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1715 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1717 /* If we're truncating an operation, we need to make sure that we
1718 don't introduce new undefined overflow. The codes tested here are
1719 a subset of those accepted by vect_truncatable_operation_p. */
1720 tree op_type = new_type;
1721 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1722 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1723 op_type = build_nonstandard_integer_type (new_precision, true);
1725 /* We specifically don't check here whether the target supports the
1726 new operation, since it might be something that a later pattern
1727 wants to rewrite anyway. If targets have a minimum element size
1728 for some optabs, we should pattern-match smaller ops to larger ops
1729 where beneficial. */
1730 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1731 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1732 if (!new_vectype || !op_vectype)
1733 return NULL;
1735 if (dump_enabled_p ())
1736 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1737 type, new_type);
1739 /* Calculate the rhs operands for an operation on OP_TYPE. */
1740 tree ops[3] = {};
1741 for (unsigned int i = 1; i < first_op; ++i)
1742 ops[i - 1] = gimple_op (last_stmt, i);
1743 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
1744 op_type, &unprom[0], op_vectype);
1746 /* Use the operation to produce a result of type OP_TYPE. */
1747 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1748 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1749 ops[0], ops[1], ops[2]);
1750 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1752 if (dump_enabled_p ())
1753 dump_printf_loc (MSG_NOTE, vect_location,
1754 "created pattern stmt: %G", pattern_stmt);
1756 /* Convert back to the original signedness, if OP_TYPE is different
1757 from NEW_TYPE. */
1758 if (op_type != new_type)
1759 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
1760 pattern_stmt, op_vectype);
1762 /* Promote the result to the original type. */
1763 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
1764 pattern_stmt, new_vectype);
1766 return pattern_stmt;
1769 /* Recognize the following patterns:
1771 ATYPE a; // narrower than TYPE
1772 BTYPE b; // narrower than TYPE
1774 1) Multiply high with scaling
1775 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1776 2) ... or also with rounding
1777 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1779 where only the bottom half of res is used. */
1781 static gimple *
1782 vect_recog_mulhs_pattern (vec_info *vinfo,
1783 stmt_vec_info last_stmt_info, tree *type_out)
1785 /* Check for a right shift. */
1786 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1787 if (!last_stmt
1788 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1789 return NULL;
1791 /* Check that the shift result is wider than the users of the
1792 result need (i.e. that narrowing would be a natural choice). */
1793 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1794 unsigned int target_precision
1795 = vect_element_precision (last_stmt_info->min_output_precision);
1796 if (!INTEGRAL_TYPE_P (lhs_type)
1797 || target_precision >= TYPE_PRECISION (lhs_type))
1798 return NULL;
1800 /* Look through any change in sign on the outer shift input. */
1801 vect_unpromoted_value unprom_rshift_input;
1802 tree rshift_input = vect_look_through_possible_promotion
1803 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1804 if (!rshift_input
1805 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1806 != TYPE_PRECISION (lhs_type))
1807 return NULL;
1809 /* Get the definition of the shift input. */
1810 stmt_vec_info rshift_input_stmt_info
1811 = vect_get_internal_def (vinfo, rshift_input);
1812 if (!rshift_input_stmt_info)
1813 return NULL;
1814 gassign *rshift_input_stmt
1815 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1816 if (!rshift_input_stmt)
1817 return NULL;
1819 stmt_vec_info mulh_stmt_info;
1820 tree scale_term;
1821 internal_fn ifn;
1822 unsigned int expect_offset;
1824 /* Check for the presence of the rounding term. */
1825 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1827 /* Check that the outer shift was by 1. */
1828 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1829 return NULL;
1831 /* Check that the second operand of the PLUS_EXPR is 1. */
1832 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1833 return NULL;
1835 /* Look through any change in sign on the addition input. */
1836 vect_unpromoted_value unprom_plus_input;
1837 tree plus_input = vect_look_through_possible_promotion
1838 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1839 if (!plus_input
1840 || TYPE_PRECISION (TREE_TYPE (plus_input))
1841 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
1842 return NULL;
1844 /* Get the definition of the multiply-high-scale part. */
1845 stmt_vec_info plus_input_stmt_info
1846 = vect_get_internal_def (vinfo, plus_input);
1847 if (!plus_input_stmt_info)
1848 return NULL;
1849 gassign *plus_input_stmt
1850 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1851 if (!plus_input_stmt
1852 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1853 return NULL;
1855 /* Look through any change in sign on the scaling input. */
1856 vect_unpromoted_value unprom_scale_input;
1857 tree scale_input = vect_look_through_possible_promotion
1858 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1859 if (!scale_input
1860 || TYPE_PRECISION (TREE_TYPE (scale_input))
1861 != TYPE_PRECISION (TREE_TYPE (plus_input)))
1862 return NULL;
1864 /* Get the definition of the multiply-high part. */
1865 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1866 if (!mulh_stmt_info)
1867 return NULL;
1869 /* Get the scaling term. */
1870 scale_term = gimple_assign_rhs2 (plus_input_stmt);
1872 expect_offset = target_precision + 2;
1873 ifn = IFN_MULHRS;
1875 else
1877 mulh_stmt_info = rshift_input_stmt_info;
1878 scale_term = gimple_assign_rhs2 (last_stmt);
1880 expect_offset = target_precision + 1;
1881 ifn = IFN_MULHS;
1884 /* Check that the scaling factor is correct. */
1885 if (TREE_CODE (scale_term) != INTEGER_CST
1886 || wi::to_widest (scale_term) + expect_offset
1887 != TYPE_PRECISION (lhs_type))
1888 return NULL;
1890 /* Check whether the scaling input term can be seen as two widened
1891 inputs multiplied together. */
1892 vect_unpromoted_value unprom_mult[2];
1893 tree new_type;
1894 unsigned int nops
1895 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1896 false, 2, unprom_mult, &new_type);
1897 if (nops != 2)
1898 return NULL;
1900 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1902 /* Adjust output precision. */
1903 if (TYPE_PRECISION (new_type) < target_precision)
1904 new_type = build_nonstandard_integer_type
1905 (target_precision, TYPE_UNSIGNED (new_type));
1907 /* Check for target support. */
1908 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1909 if (!new_vectype
1910 || !direct_internal_fn_supported_p
1911 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1912 return NULL;
1914 /* The IR requires a valid vector type for the cast result, even though
1915 it's likely to be discarded. */
1916 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
1917 if (!*type_out)
1918 return NULL;
1920 /* Generate the IFN_MULHRS call. */
1921 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1922 tree new_ops[2];
1923 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
1924 unprom_mult, new_vectype);
1925 gcall *mulhrs_stmt
1926 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1927 gimple_call_set_lhs (mulhrs_stmt, new_var);
1928 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_NOTE, vect_location,
1932 "created pattern stmt: %G", mulhrs_stmt);
1934 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
1935 mulhrs_stmt, new_vectype);
1938 /* Recognize the patterns:
1940 ATYPE a; // narrower than TYPE
1941 BTYPE b; // narrower than TYPE
1942 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1943 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1945 where only the bottom half of avg is used. Try to transform them into:
1947 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1948 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1950 followed by:
1952 TYPE avg = (TYPE) avg';
1954 where NTYPE is no wider than half of TYPE. Since only the bottom half
1955 of avg is used, all or part of the cast of avg' should become redundant.
1957 If there is no target support available, generate code to distribute rshift
1958 over plus and add a carry. */
1960 static gimple *
1961 vect_recog_average_pattern (vec_info *vinfo,
1962 stmt_vec_info last_stmt_info, tree *type_out)
1964 /* Check for a shift right by one bit. */
1965 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1966 if (!last_stmt
1967 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1968 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1969 return NULL;
1971 /* Check that the shift result is wider than the users of the
1972 result need (i.e. that narrowing would be a natural choice). */
1973 tree lhs = gimple_assign_lhs (last_stmt);
1974 tree type = TREE_TYPE (lhs);
1975 unsigned int target_precision
1976 = vect_element_precision (last_stmt_info->min_output_precision);
1977 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1978 return NULL;
1980 /* Look through any change in sign on the shift input. */
1981 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1982 vect_unpromoted_value unprom_plus;
1983 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1984 &unprom_plus);
1985 if (!rshift_rhs
1986 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1987 return NULL;
1989 /* Get the definition of the shift input. */
1990 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1991 if (!plus_stmt_info)
1992 return NULL;
1994 /* Check whether the shift input can be seen as a tree of additions on
1995 2 or 3 widened inputs.
1997 Note that the pattern should be a win even if the result of one or
1998 more additions is reused elsewhere: if the pattern matches, we'd be
1999 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2000 internal_fn ifn = IFN_AVG_FLOOR;
2001 vect_unpromoted_value unprom[3];
2002 tree new_type;
2003 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2004 WIDEN_PLUS_EXPR, false, 3,
2005 unprom, &new_type);
2006 if (nops == 0)
2007 return NULL;
2008 if (nops == 3)
2010 /* Check that one operand is 1. */
2011 unsigned int i;
2012 for (i = 0; i < 3; ++i)
2013 if (integer_onep (unprom[i].op))
2014 break;
2015 if (i == 3)
2016 return NULL;
2017 /* Throw away the 1 operand and keep the other two. */
2018 if (i < 2)
2019 unprom[i] = unprom[2];
2020 ifn = IFN_AVG_CEIL;
2023 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2025 /* We know that:
2027 (a) the operation can be viewed as:
2029 TYPE widened0 = (TYPE) UNPROM[0];
2030 TYPE widened1 = (TYPE) UNPROM[1];
2031 TYPE tmp1 = widened0 + widened1 {+ 1};
2032 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2034 (b) the first two statements are equivalent to:
2036 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2037 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2039 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2040 where sensible;
2042 (d) all the operations can be performed correctly at twice the width of
2043 NEW_TYPE, due to the nature of the average operation; and
2045 (e) users of the result of the right shift need only TARGET_PRECISION
2046 bits, where TARGET_PRECISION is no more than half of TYPE's
2047 precision.
2049 Under these circumstances, the only situation in which NEW_TYPE
2050 could be narrower than TARGET_PRECISION is if widened0, widened1
2051 and an addition result are all used more than once. Thus we can
2052 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2053 as "free", whereas widening the result of the average instruction
2054 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2055 therefore better not to go narrower than TARGET_PRECISION. */
2056 if (TYPE_PRECISION (new_type) < target_precision)
2057 new_type = build_nonstandard_integer_type (target_precision,
2058 TYPE_UNSIGNED (new_type));
2060 /* Check for target support. */
2061 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2062 if (!new_vectype)
2063 return NULL;
2065 bool fallback_p = false;
2067 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2069 else if (TYPE_UNSIGNED (new_type)
2070 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2071 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2072 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2073 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2074 fallback_p = true;
2075 else
2076 return NULL;
2078 /* The IR requires a valid vector type for the cast result, even though
2079 it's likely to be discarded. */
2080 *type_out = get_vectype_for_scalar_type (vinfo, type);
2081 if (!*type_out)
2082 return NULL;
2084 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2085 tree new_ops[2];
2086 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2087 unprom, new_vectype);
2089 if (fallback_p)
2091 /* As a fallback, generate code for following sequence:
2093 shifted_op0 = new_ops[0] >> 1;
2094 shifted_op1 = new_ops[1] >> 1;
2095 sum_of_shifted = shifted_op0 + shifted_op1;
2096 unmasked_carry = new_ops[0] and/or new_ops[1];
2097 carry = unmasked_carry & 1;
2098 new_var = sum_of_shifted + carry;
2101 tree one_cst = build_one_cst (new_type);
2102 gassign *g;
2104 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2105 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2106 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2108 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2109 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2110 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2112 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2113 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2114 shifted_op0, shifted_op1);
2115 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2117 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2118 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2119 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2120 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2122 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2123 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2124 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2126 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2127 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2130 /* Generate the IFN_AVG* call. */
2131 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2132 new_ops[1]);
2133 gimple_call_set_lhs (average_stmt, new_var);
2134 gimple_set_location (average_stmt, gimple_location (last_stmt));
2136 if (dump_enabled_p ())
2137 dump_printf_loc (MSG_NOTE, vect_location,
2138 "created pattern stmt: %G", average_stmt);
2140 return vect_convert_output (vinfo, last_stmt_info,
2141 type, average_stmt, new_vectype);
2144 /* Recognize cases in which the input to a cast is wider than its
2145 output, and the input is fed by a widening operation. Fold this
2146 by removing the unnecessary intermediate widening. E.g.:
2148 unsigned char a;
2149 unsigned int b = (unsigned int) a;
2150 unsigned short c = (unsigned short) b;
2154 unsigned short c = (unsigned short) a;
2156 Although this is rare in input IR, it is an expected side-effect
2157 of the over-widening pattern above.
2159 This is beneficial also for integer-to-float conversions, if the
2160 widened integer has more bits than the float, and if the unwidened
2161 input doesn't. */
2163 static gimple *
2164 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2165 stmt_vec_info last_stmt_info, tree *type_out)
2167 /* Check for a cast, including an integer-to-float conversion. */
2168 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2169 if (!last_stmt)
2170 return NULL;
2171 tree_code code = gimple_assign_rhs_code (last_stmt);
2172 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2173 return NULL;
2175 /* Make sure that the rhs is a scalar with a natural bitsize. */
2176 tree lhs = gimple_assign_lhs (last_stmt);
2177 if (!lhs)
2178 return NULL;
2179 tree lhs_type = TREE_TYPE (lhs);
2180 scalar_mode lhs_mode;
2181 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2182 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2183 return NULL;
2185 /* Check for a narrowing operation (from a vector point of view). */
2186 tree rhs = gimple_assign_rhs1 (last_stmt);
2187 tree rhs_type = TREE_TYPE (rhs);
2188 if (!INTEGRAL_TYPE_P (rhs_type)
2189 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2190 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2191 return NULL;
2193 /* Try to find an unpromoted input. */
2194 vect_unpromoted_value unprom;
2195 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2196 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2197 return NULL;
2199 /* If the bits above RHS_TYPE matter, make sure that they're the
2200 same when extending from UNPROM as they are when extending from RHS. */
2201 if (!INTEGRAL_TYPE_P (lhs_type)
2202 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2203 return NULL;
2205 /* We can get the same result by casting UNPROM directly, to avoid
2206 the unnecessary widening and narrowing. */
2207 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2209 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2210 if (!*type_out)
2211 return NULL;
2213 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2214 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2215 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2217 return pattern_stmt;
2220 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2221 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2223 static gimple *
2224 vect_recog_widen_shift_pattern (vec_info *vinfo,
2225 stmt_vec_info last_stmt_info, tree *type_out)
2227 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2228 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2229 "vect_recog_widen_shift_pattern");
2232 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2234 type a_t, b_t, c_t;
2236 S0 a_t = b_t r<< c_t;
2238 Input/Output:
2240 * STMT_VINFO: The stmt from which the pattern search begins,
2241 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2242 with a sequence:
2244 S1 d_t = -c_t;
2245 S2 e_t = d_t & (B - 1);
2246 S3 f_t = b_t << c_t;
2247 S4 g_t = b_t >> e_t;
2248 S0 a_t = f_t | g_t;
2250 where B is element bitsize of type.
2252 Output:
2254 * TYPE_OUT: The type of the output of this pattern.
2256 * Return value: A new stmt that will be used to replace the rotate
2257 S0 stmt. */
2259 static gimple *
2260 vect_recog_rotate_pattern (vec_info *vinfo,
2261 stmt_vec_info stmt_vinfo, tree *type_out)
2263 gimple *last_stmt = stmt_vinfo->stmt;
2264 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2265 gimple *pattern_stmt, *def_stmt;
2266 enum tree_code rhs_code;
2267 enum vect_def_type dt;
2268 optab optab1, optab2;
2269 edge ext_def = NULL;
2270 bool bswap16_p = false;
2272 if (is_gimple_assign (last_stmt))
2274 rhs_code = gimple_assign_rhs_code (last_stmt);
2275 switch (rhs_code)
2277 case LROTATE_EXPR:
2278 case RROTATE_EXPR:
2279 break;
2280 default:
2281 return NULL;
2284 lhs = gimple_assign_lhs (last_stmt);
2285 oprnd0 = gimple_assign_rhs1 (last_stmt);
2286 type = TREE_TYPE (oprnd0);
2287 oprnd1 = gimple_assign_rhs2 (last_stmt);
2289 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2291 /* __builtin_bswap16 (x) is another form of x r>> 8.
2292 The vectorizer has bswap support, but only if the argument isn't
2293 promoted. */
2294 lhs = gimple_call_lhs (last_stmt);
2295 oprnd0 = gimple_call_arg (last_stmt, 0);
2296 type = TREE_TYPE (oprnd0);
2297 if (!lhs
2298 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2299 || TYPE_PRECISION (type) <= 16
2300 || TREE_CODE (oprnd0) != SSA_NAME
2301 || BITS_PER_UNIT != 8
2302 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2303 return NULL;
2305 stmt_vec_info def_stmt_info;
2306 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2307 return NULL;
2309 if (dt != vect_internal_def)
2310 return NULL;
2312 if (gimple_assign_cast_p (def_stmt))
2314 def = gimple_assign_rhs1 (def_stmt);
2315 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2316 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2317 oprnd0 = def;
2320 type = TREE_TYPE (lhs);
2321 vectype = get_vectype_for_scalar_type (vinfo, type);
2322 if (vectype == NULL_TREE)
2323 return NULL;
2325 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2327 /* The encoding uses one stepped pattern for each byte in the
2328 16-bit word. */
2329 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2330 for (unsigned i = 0; i < 3; ++i)
2331 for (unsigned j = 0; j < 2; ++j)
2332 elts.quick_push ((i + 1) * 2 - j - 1);
2334 vec_perm_indices indices (elts, 1,
2335 TYPE_VECTOR_SUBPARTS (char_vectype));
2336 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2338 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2339 undo the argument promotion. */
2340 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2342 def = vect_recog_temp_ssa_var (type, NULL);
2343 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2344 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2345 oprnd0 = def;
2348 /* Pattern detected. */
2349 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2351 *type_out = vectype;
2353 /* Pattern supported. Create a stmt to be used to replace the
2354 pattern, with the unpromoted argument. */
2355 var = vect_recog_temp_ssa_var (type, NULL);
2356 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2357 1, oprnd0);
2358 gimple_call_set_lhs (pattern_stmt, var);
2359 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2360 gimple_call_fntype (last_stmt));
2361 return pattern_stmt;
2365 oprnd1 = build_int_cst (integer_type_node, 8);
2366 rhs_code = LROTATE_EXPR;
2367 bswap16_p = true;
2369 else
2370 return NULL;
2372 if (TREE_CODE (oprnd0) != SSA_NAME
2373 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2374 || !INTEGRAL_TYPE_P (type)
2375 || !TYPE_UNSIGNED (type))
2376 return NULL;
2378 stmt_vec_info def_stmt_info;
2379 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2380 return NULL;
2382 if (dt != vect_internal_def
2383 && dt != vect_constant_def
2384 && dt != vect_external_def)
2385 return NULL;
2387 vectype = get_vectype_for_scalar_type (vinfo, type);
2388 if (vectype == NULL_TREE)
2389 return NULL;
2391 /* If vector/vector or vector/scalar rotate is supported by the target,
2392 don't do anything here. */
2393 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2394 if (optab1
2395 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2397 use_rotate:
2398 if (bswap16_p)
2400 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2402 def = vect_recog_temp_ssa_var (type, NULL);
2403 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2404 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2405 oprnd0 = def;
2408 /* Pattern detected. */
2409 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2411 *type_out = vectype;
2413 /* Pattern supported. Create a stmt to be used to replace the
2414 pattern. */
2415 var = vect_recog_temp_ssa_var (type, NULL);
2416 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2417 oprnd1);
2418 return pattern_stmt;
2420 return NULL;
2423 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2425 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2426 if (optab2
2427 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2428 goto use_rotate;
2431 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2432 don't do anything here either. */
2433 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2434 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2435 if (!optab1
2436 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2437 || !optab2
2438 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2440 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2441 return NULL;
2442 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2443 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2444 if (!optab1
2445 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2446 || !optab2
2447 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2448 return NULL;
2451 *type_out = vectype;
2453 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2455 def = vect_recog_temp_ssa_var (type, NULL);
2456 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2457 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2458 oprnd0 = def;
2461 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2462 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2464 def = NULL_TREE;
2465 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2466 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2467 def = oprnd1;
2468 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2470 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2471 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2472 && TYPE_PRECISION (TREE_TYPE (rhs1))
2473 == TYPE_PRECISION (type))
2474 def = rhs1;
2477 if (def == NULL_TREE)
2479 def = vect_recog_temp_ssa_var (type, NULL);
2480 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2481 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2483 stype = TREE_TYPE (def);
2485 if (TREE_CODE (def) == INTEGER_CST)
2487 if (!tree_fits_uhwi_p (def)
2488 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2489 || integer_zerop (def))
2490 return NULL;
2491 def2 = build_int_cst (stype,
2492 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2494 else
2496 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2498 if (vecstype == NULL_TREE)
2499 return NULL;
2500 def2 = vect_recog_temp_ssa_var (stype, NULL);
2501 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2502 if (ext_def)
2504 basic_block new_bb
2505 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2506 gcc_assert (!new_bb);
2508 else
2509 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2511 def2 = vect_recog_temp_ssa_var (stype, NULL);
2512 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2513 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2514 gimple_assign_lhs (def_stmt), mask);
2515 if (ext_def)
2517 basic_block new_bb
2518 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2519 gcc_assert (!new_bb);
2521 else
2522 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2525 var1 = vect_recog_temp_ssa_var (type, NULL);
2526 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2527 ? LSHIFT_EXPR : RSHIFT_EXPR,
2528 oprnd0, def);
2529 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2531 var2 = vect_recog_temp_ssa_var (type, NULL);
2532 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2533 ? RSHIFT_EXPR : LSHIFT_EXPR,
2534 oprnd0, def2);
2535 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2537 /* Pattern detected. */
2538 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2540 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2541 var = vect_recog_temp_ssa_var (type, NULL);
2542 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2544 return pattern_stmt;
2547 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2548 vectorized:
2550 type a_t;
2551 TYPE b_T, res_T;
2553 S1 a_t = ;
2554 S2 b_T = ;
2555 S3 res_T = b_T op a_t;
2557 where type 'TYPE' is a type with different size than 'type',
2558 and op is <<, >> or rotate.
2560 Also detect cases:
2562 type a_t;
2563 TYPE b_T, c_T, res_T;
2565 S0 c_T = ;
2566 S1 a_t = (type) c_T;
2567 S2 b_T = ;
2568 S3 res_T = b_T op a_t;
2570 Input/Output:
2572 * STMT_VINFO: The stmt from which the pattern search begins,
2573 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2574 with a shift/rotate which has same type on both operands, in the
2575 second case just b_T op c_T, in the first case with added cast
2576 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2578 Output:
2580 * TYPE_OUT: The type of the output of this pattern.
2582 * Return value: A new stmt that will be used to replace the shift/rotate
2583 S3 stmt. */
2585 static gimple *
2586 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2587 stmt_vec_info stmt_vinfo,
2588 tree *type_out)
2590 gimple *last_stmt = stmt_vinfo->stmt;
2591 tree oprnd0, oprnd1, lhs, var;
2592 gimple *pattern_stmt;
2593 enum tree_code rhs_code;
2595 if (!is_gimple_assign (last_stmt))
2596 return NULL;
2598 rhs_code = gimple_assign_rhs_code (last_stmt);
2599 switch (rhs_code)
2601 case LSHIFT_EXPR:
2602 case RSHIFT_EXPR:
2603 case LROTATE_EXPR:
2604 case RROTATE_EXPR:
2605 break;
2606 default:
2607 return NULL;
2610 lhs = gimple_assign_lhs (last_stmt);
2611 oprnd0 = gimple_assign_rhs1 (last_stmt);
2612 oprnd1 = gimple_assign_rhs2 (last_stmt);
2613 if (TREE_CODE (oprnd0) != SSA_NAME
2614 || TREE_CODE (oprnd1) != SSA_NAME
2615 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2616 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2617 || TYPE_PRECISION (TREE_TYPE (lhs))
2618 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2619 return NULL;
2621 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2622 if (!def_vinfo)
2623 return NULL;
2625 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2626 if (*type_out == NULL_TREE)
2627 return NULL;
2629 tree def = NULL_TREE;
2630 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2631 if (def_stmt && gimple_assign_cast_p (def_stmt))
2633 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2634 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2635 && TYPE_PRECISION (TREE_TYPE (rhs1))
2636 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2638 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2639 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2640 def = rhs1;
2641 else
2643 tree mask
2644 = build_low_bits_mask (TREE_TYPE (rhs1),
2645 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2646 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2647 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2648 tree vecstype = get_vectype_for_scalar_type (vinfo,
2649 TREE_TYPE (rhs1));
2650 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2655 if (def == NULL_TREE)
2657 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2658 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2659 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2662 /* Pattern detected. */
2663 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2665 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2666 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2667 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2669 return pattern_stmt;
2672 /* Return true iff the target has a vector optab implementing the operation
2673 CODE on type VECTYPE. */
2675 static bool
2676 target_has_vecop_for_code (tree_code code, tree vectype)
2678 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2679 return voptab
2680 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2683 /* Verify that the target has optabs of VECTYPE to perform all the steps
2684 needed by the multiplication-by-immediate synthesis algorithm described by
2685 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2686 present. Return true iff the target supports all the steps. */
2688 static bool
2689 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2690 tree vectype, bool synth_shift_p)
2692 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2693 return false;
2695 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2696 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2698 if (var == negate_variant
2699 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2700 return false;
2702 /* If we must synthesize shifts with additions make sure that vector
2703 addition is available. */
2704 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2705 return false;
2707 for (int i = 1; i < alg->ops; i++)
2709 switch (alg->op[i])
2711 case alg_shift:
2712 break;
2713 case alg_add_t_m2:
2714 case alg_add_t2_m:
2715 case alg_add_factor:
2716 if (!supports_vplus)
2717 return false;
2718 break;
2719 case alg_sub_t_m2:
2720 case alg_sub_t2_m:
2721 case alg_sub_factor:
2722 if (!supports_vminus)
2723 return false;
2724 break;
2725 case alg_unknown:
2726 case alg_m:
2727 case alg_zero:
2728 case alg_impossible:
2729 return false;
2730 default:
2731 gcc_unreachable ();
2735 return true;
2738 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2739 putting the final result in DEST. Append all statements but the last into
2740 VINFO. Return the last statement. */
2742 static gimple *
2743 synth_lshift_by_additions (vec_info *vinfo,
2744 tree dest, tree op, HOST_WIDE_INT amnt,
2745 stmt_vec_info stmt_info)
2747 HOST_WIDE_INT i;
2748 tree itype = TREE_TYPE (op);
2749 tree prev_res = op;
2750 gcc_assert (amnt >= 0);
2751 for (i = 0; i < amnt; i++)
2753 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2754 : dest;
2755 gimple *stmt
2756 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2757 prev_res = tmp_var;
2758 if (i < amnt - 1)
2759 append_pattern_def_seq (vinfo, stmt_info, stmt);
2760 else
2761 return stmt;
2763 gcc_unreachable ();
2764 return NULL;
2767 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2768 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2769 the process if necessary. Append the resulting assignment statements
2770 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2771 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2772 left shifts using additions. */
2774 static tree
2775 apply_binop_and_append_stmt (vec_info *vinfo,
2776 tree_code code, tree op1, tree op2,
2777 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2779 if (integer_zerop (op2)
2780 && (code == LSHIFT_EXPR
2781 || code == PLUS_EXPR))
2783 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2784 return op1;
2787 gimple *stmt;
2788 tree itype = TREE_TYPE (op1);
2789 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2791 if (code == LSHIFT_EXPR
2792 && synth_shift_p)
2794 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
2795 TREE_INT_CST_LOW (op2), stmt_vinfo);
2796 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2797 return tmp_var;
2800 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2801 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2802 return tmp_var;
2805 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2806 and simple arithmetic operations to be vectorized. Record the statements
2807 produced in STMT_VINFO and return the last statement in the sequence or
2808 NULL if it's not possible to synthesize such a multiplication.
2809 This function mirrors the behavior of expand_mult_const in expmed.c but
2810 works on tree-ssa form. */
2812 static gimple *
2813 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
2814 stmt_vec_info stmt_vinfo)
2816 tree itype = TREE_TYPE (op);
2817 machine_mode mode = TYPE_MODE (itype);
2818 struct algorithm alg;
2819 mult_variant variant;
2820 if (!tree_fits_shwi_p (val))
2821 return NULL;
2823 /* Multiplication synthesis by shifts, adds and subs can introduce
2824 signed overflow where the original operation didn't. Perform the
2825 operations on an unsigned type and cast back to avoid this.
2826 In the future we may want to relax this for synthesis algorithms
2827 that we can prove do not cause unexpected overflow. */
2828 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2830 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2832 /* Targets that don't support vector shifts but support vector additions
2833 can synthesize shifts that way. */
2834 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
2836 HOST_WIDE_INT hwval = tree_to_shwi (val);
2837 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2838 The vectorizer's benefit analysis will decide whether it's beneficial
2839 to do this. */
2840 bool possible = choose_mult_variant (mode, hwval, &alg,
2841 &variant, MAX_COST);
2842 if (!possible)
2843 return NULL;
2845 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
2847 if (!vectype
2848 || !target_supports_mult_synth_alg (&alg, variant,
2849 vectype, synth_shift_p))
2850 return NULL;
2852 tree accumulator;
2854 /* Clear out the sequence of statements so we can populate it below. */
2855 gimple *stmt = NULL;
2857 if (cast_to_unsigned_p)
2859 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2860 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2861 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2862 op = tmp_op;
2865 if (alg.op[0] == alg_zero)
2866 accumulator = build_int_cst (multtype, 0);
2867 else
2868 accumulator = op;
2870 bool needs_fixup = (variant == negate_variant)
2871 || (variant == add_variant);
2873 for (int i = 1; i < alg.ops; i++)
2875 tree shft_log = build_int_cst (multtype, alg.log[i]);
2876 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2877 tree tmp_var = NULL_TREE;
2879 switch (alg.op[i])
2881 case alg_shift:
2882 if (synth_shift_p)
2883 stmt
2884 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
2885 alg.log[i], stmt_vinfo);
2886 else
2887 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2888 shft_log);
2889 break;
2890 case alg_add_t_m2:
2891 tmp_var
2892 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
2893 stmt_vinfo, synth_shift_p);
2894 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2895 tmp_var);
2896 break;
2897 case alg_sub_t_m2:
2898 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
2899 shft_log, stmt_vinfo,
2900 synth_shift_p);
2901 /* In some algorithms the first step involves zeroing the
2902 accumulator. If subtracting from such an accumulator
2903 just emit the negation directly. */
2904 if (integer_zerop (accumulator))
2905 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2906 else
2907 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2908 tmp_var);
2909 break;
2910 case alg_add_t2_m:
2911 tmp_var
2912 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2913 shft_log, stmt_vinfo, synth_shift_p);
2914 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2915 break;
2916 case alg_sub_t2_m:
2917 tmp_var
2918 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2919 shft_log, stmt_vinfo, synth_shift_p);
2920 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2921 break;
2922 case alg_add_factor:
2923 tmp_var
2924 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2925 shft_log, stmt_vinfo, synth_shift_p);
2926 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2927 tmp_var);
2928 break;
2929 case alg_sub_factor:
2930 tmp_var
2931 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2932 shft_log, stmt_vinfo, synth_shift_p);
2933 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2934 accumulator);
2935 break;
2936 default:
2937 gcc_unreachable ();
2939 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2940 but rather return it directly. */
2942 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2943 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2944 accumulator = accum_tmp;
2946 if (variant == negate_variant)
2948 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2949 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2950 accumulator = accum_tmp;
2951 if (cast_to_unsigned_p)
2952 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2954 else if (variant == add_variant)
2956 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2957 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2958 accumulator = accum_tmp;
2959 if (cast_to_unsigned_p)
2960 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2962 /* Move back to a signed if needed. */
2963 if (cast_to_unsigned_p)
2965 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2966 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2969 return stmt;
2972 /* Detect multiplication by constant and convert it into a sequence of
2973 shifts and additions, subtractions, negations. We reuse the
2974 choose_mult_variant algorithms from expmed.c
2976 Input/Output:
2978 STMT_VINFO: The stmt from which the pattern search begins,
2979 i.e. the mult stmt.
2981 Output:
2983 * TYPE_OUT: The type of the output of this pattern.
2985 * Return value: A new stmt that will be used to replace
2986 the multiplication. */
2988 static gimple *
2989 vect_recog_mult_pattern (vec_info *vinfo,
2990 stmt_vec_info stmt_vinfo, tree *type_out)
2992 gimple *last_stmt = stmt_vinfo->stmt;
2993 tree oprnd0, oprnd1, vectype, itype;
2994 gimple *pattern_stmt;
2996 if (!is_gimple_assign (last_stmt))
2997 return NULL;
2999 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3000 return NULL;
3002 oprnd0 = gimple_assign_rhs1 (last_stmt);
3003 oprnd1 = gimple_assign_rhs2 (last_stmt);
3004 itype = TREE_TYPE (oprnd0);
3006 if (TREE_CODE (oprnd0) != SSA_NAME
3007 || TREE_CODE (oprnd1) != INTEGER_CST
3008 || !INTEGRAL_TYPE_P (itype)
3009 || !type_has_mode_precision_p (itype))
3010 return NULL;
3012 vectype = get_vectype_for_scalar_type (vinfo, itype);
3013 if (vectype == NULL_TREE)
3014 return NULL;
3016 /* If the target can handle vectorized multiplication natively,
3017 don't attempt to optimize this. */
3018 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3019 if (mul_optab != unknown_optab)
3021 machine_mode vec_mode = TYPE_MODE (vectype);
3022 int icode = (int) optab_handler (mul_optab, vec_mode);
3023 if (icode != CODE_FOR_nothing)
3024 return NULL;
3027 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3028 oprnd0, oprnd1, stmt_vinfo);
3029 if (!pattern_stmt)
3030 return NULL;
3032 /* Pattern detected. */
3033 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3035 *type_out = vectype;
3037 return pattern_stmt;
3040 /* Detect a signed division by a constant that wouldn't be
3041 otherwise vectorized:
3043 type a_t, b_t;
3045 S1 a_t = b_t / N;
3047 where type 'type' is an integral type and N is a constant.
3049 Similarly handle modulo by a constant:
3051 S4 a_t = b_t % N;
3053 Input/Output:
3055 * STMT_VINFO: The stmt from which the pattern search begins,
3056 i.e. the division stmt. S1 is replaced by if N is a power
3057 of two constant and type is signed:
3058 S3 y_t = b_t < 0 ? N - 1 : 0;
3059 S2 x_t = b_t + y_t;
3060 S1' a_t = x_t >> log2 (N);
3062 S4 is replaced if N is a power of two constant and
3063 type is signed by (where *_T temporaries have unsigned type):
3064 S9 y_T = b_t < 0 ? -1U : 0U;
3065 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3066 S7 z_t = (type) z_T;
3067 S6 w_t = b_t + z_t;
3068 S5 x_t = w_t & (N - 1);
3069 S4' a_t = x_t - z_t;
3071 Output:
3073 * TYPE_OUT: The type of the output of this pattern.
3075 * Return value: A new stmt that will be used to replace the division
3076 S1 or modulo S4 stmt. */
3078 static gimple *
3079 vect_recog_divmod_pattern (vec_info *vinfo,
3080 stmt_vec_info stmt_vinfo, tree *type_out)
3082 gimple *last_stmt = stmt_vinfo->stmt;
3083 tree oprnd0, oprnd1, vectype, itype, cond;
3084 gimple *pattern_stmt, *def_stmt;
3085 enum tree_code rhs_code;
3086 optab optab;
3087 tree q;
3088 int dummy_int, prec;
3090 if (!is_gimple_assign (last_stmt))
3091 return NULL;
3093 rhs_code = gimple_assign_rhs_code (last_stmt);
3094 switch (rhs_code)
3096 case TRUNC_DIV_EXPR:
3097 case EXACT_DIV_EXPR:
3098 case TRUNC_MOD_EXPR:
3099 break;
3100 default:
3101 return NULL;
3104 oprnd0 = gimple_assign_rhs1 (last_stmt);
3105 oprnd1 = gimple_assign_rhs2 (last_stmt);
3106 itype = TREE_TYPE (oprnd0);
3107 if (TREE_CODE (oprnd0) != SSA_NAME
3108 || TREE_CODE (oprnd1) != INTEGER_CST
3109 || TREE_CODE (itype) != INTEGER_TYPE
3110 || !type_has_mode_precision_p (itype))
3111 return NULL;
3113 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3114 vectype = get_vectype_for_scalar_type (vinfo, itype);
3115 if (vectype == NULL_TREE)
3116 return NULL;
3118 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3120 /* If the target can handle vectorized division or modulo natively,
3121 don't attempt to optimize this, since native division is likely
3122 to give smaller code. */
3123 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3124 if (optab != unknown_optab)
3126 machine_mode vec_mode = TYPE_MODE (vectype);
3127 int icode = (int) optab_handler (optab, vec_mode);
3128 if (icode != CODE_FOR_nothing)
3129 return NULL;
3133 prec = TYPE_PRECISION (itype);
3134 if (integer_pow2p (oprnd1))
3136 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3137 return NULL;
3139 /* Pattern detected. */
3140 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3142 *type_out = vectype;
3144 /* Check if the target supports this internal function. */
3145 internal_fn ifn = IFN_DIV_POW2;
3146 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3148 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3150 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3151 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3152 gimple_call_set_lhs (div_stmt, var_div);
3154 if (rhs_code == TRUNC_MOD_EXPR)
3156 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3157 def_stmt
3158 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3159 LSHIFT_EXPR, var_div, shift);
3160 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3161 pattern_stmt
3162 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3163 MINUS_EXPR, oprnd0,
3164 gimple_assign_lhs (def_stmt));
3166 else
3167 pattern_stmt = div_stmt;
3168 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3170 return pattern_stmt;
3173 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3174 build_int_cst (itype, 0));
3175 if (rhs_code == TRUNC_DIV_EXPR
3176 || rhs_code == EXACT_DIV_EXPR)
3178 tree var = vect_recog_temp_ssa_var (itype, NULL);
3179 tree shift;
3180 def_stmt
3181 = gimple_build_assign (var, COND_EXPR, cond,
3182 fold_build2 (MINUS_EXPR, itype, oprnd1,
3183 build_int_cst (itype, 1)),
3184 build_int_cst (itype, 0));
3185 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3186 var = vect_recog_temp_ssa_var (itype, NULL);
3187 def_stmt
3188 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3189 gimple_assign_lhs (def_stmt));
3190 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3192 shift = build_int_cst (itype, tree_log2 (oprnd1));
3193 pattern_stmt
3194 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3195 RSHIFT_EXPR, var, shift);
3197 else
3199 tree signmask;
3200 if (compare_tree_int (oprnd1, 2) == 0)
3202 signmask = vect_recog_temp_ssa_var (itype, NULL);
3203 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3204 build_int_cst (itype, 1),
3205 build_int_cst (itype, 0));
3206 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3208 else
3210 tree utype
3211 = build_nonstandard_integer_type (prec, 1);
3212 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3213 tree shift
3214 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3215 - tree_log2 (oprnd1));
3216 tree var = vect_recog_temp_ssa_var (utype, NULL);
3218 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3219 build_int_cst (utype, -1),
3220 build_int_cst (utype, 0));
3221 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3222 var = vect_recog_temp_ssa_var (utype, NULL);
3223 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3224 gimple_assign_lhs (def_stmt),
3225 shift);
3226 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3227 signmask = vect_recog_temp_ssa_var (itype, NULL);
3228 def_stmt
3229 = gimple_build_assign (signmask, NOP_EXPR, var);
3230 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3232 def_stmt
3233 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3234 PLUS_EXPR, oprnd0, signmask);
3235 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3236 def_stmt
3237 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3238 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3239 fold_build2 (MINUS_EXPR, itype, oprnd1,
3240 build_int_cst (itype, 1)));
3241 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3243 pattern_stmt
3244 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3245 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3246 signmask);
3249 return pattern_stmt;
3252 if (prec > HOST_BITS_PER_WIDE_INT
3253 || integer_zerop (oprnd1))
3254 return NULL;
3256 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3257 return NULL;
3259 if (TYPE_UNSIGNED (itype))
3261 unsigned HOST_WIDE_INT mh, ml;
3262 int pre_shift, post_shift;
3263 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3264 & GET_MODE_MASK (itype_mode));
3265 tree t1, t2, t3, t4;
3267 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3268 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3269 return NULL;
3271 /* Find a suitable multiplier and right shift count
3272 instead of multiplying with D. */
3273 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3275 /* If the suggested multiplier is more than SIZE bits, we can do better
3276 for even divisors, using an initial right shift. */
3277 if (mh != 0 && (d & 1) == 0)
3279 pre_shift = ctz_or_zero (d);
3280 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3281 &ml, &post_shift, &dummy_int);
3282 gcc_assert (!mh);
3284 else
3285 pre_shift = 0;
3287 if (mh != 0)
3289 if (post_shift - 1 >= prec)
3290 return NULL;
3292 /* t1 = oprnd0 h* ml;
3293 t2 = oprnd0 - t1;
3294 t3 = t2 >> 1;
3295 t4 = t1 + t3;
3296 q = t4 >> (post_shift - 1); */
3297 t1 = vect_recog_temp_ssa_var (itype, NULL);
3298 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3299 build_int_cst (itype, ml));
3300 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3302 t2 = vect_recog_temp_ssa_var (itype, NULL);
3303 def_stmt
3304 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3305 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3307 t3 = vect_recog_temp_ssa_var (itype, NULL);
3308 def_stmt
3309 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3310 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3312 t4 = vect_recog_temp_ssa_var (itype, NULL);
3313 def_stmt
3314 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3316 if (post_shift != 1)
3318 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3320 q = vect_recog_temp_ssa_var (itype, NULL);
3321 pattern_stmt
3322 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3323 build_int_cst (itype, post_shift - 1));
3325 else
3327 q = t4;
3328 pattern_stmt = def_stmt;
3331 else
3333 if (pre_shift >= prec || post_shift >= prec)
3334 return NULL;
3336 /* t1 = oprnd0 >> pre_shift;
3337 t2 = t1 h* ml;
3338 q = t2 >> post_shift; */
3339 if (pre_shift)
3341 t1 = vect_recog_temp_ssa_var (itype, NULL);
3342 def_stmt
3343 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3344 build_int_cst (NULL, pre_shift));
3345 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3347 else
3348 t1 = oprnd0;
3350 t2 = vect_recog_temp_ssa_var (itype, NULL);
3351 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3352 build_int_cst (itype, ml));
3354 if (post_shift)
3356 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3358 q = vect_recog_temp_ssa_var (itype, NULL);
3359 def_stmt
3360 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3361 build_int_cst (itype, post_shift));
3363 else
3364 q = t2;
3366 pattern_stmt = def_stmt;
3369 else
3371 unsigned HOST_WIDE_INT ml;
3372 int post_shift;
3373 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3374 unsigned HOST_WIDE_INT abs_d;
3375 bool add = false;
3376 tree t1, t2, t3, t4;
3378 /* Give up for -1. */
3379 if (d == -1)
3380 return NULL;
3382 /* Since d might be INT_MIN, we have to cast to
3383 unsigned HOST_WIDE_INT before negating to avoid
3384 undefined signed overflow. */
3385 abs_d = (d >= 0
3386 ? (unsigned HOST_WIDE_INT) d
3387 : - (unsigned HOST_WIDE_INT) d);
3389 /* n rem d = n rem -d */
3390 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3392 d = abs_d;
3393 oprnd1 = build_int_cst (itype, abs_d);
3395 if (HOST_BITS_PER_WIDE_INT >= prec
3396 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3397 /* This case is not handled correctly below. */
3398 return NULL;
3400 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3401 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3403 add = true;
3404 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3406 if (post_shift >= prec)
3407 return NULL;
3409 /* t1 = oprnd0 h* ml; */
3410 t1 = vect_recog_temp_ssa_var (itype, NULL);
3411 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3412 build_int_cst (itype, ml));
3414 if (add)
3416 /* t2 = t1 + oprnd0; */
3417 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3418 t2 = vect_recog_temp_ssa_var (itype, NULL);
3419 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3421 else
3422 t2 = t1;
3424 if (post_shift)
3426 /* t3 = t2 >> post_shift; */
3427 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3428 t3 = vect_recog_temp_ssa_var (itype, NULL);
3429 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3430 build_int_cst (itype, post_shift));
3432 else
3433 t3 = t2;
3435 wide_int oprnd0_min, oprnd0_max;
3436 int msb = 1;
3437 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3439 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3440 msb = 0;
3441 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3442 msb = -1;
3445 if (msb == 0 && d >= 0)
3447 /* q = t3; */
3448 q = t3;
3449 pattern_stmt = def_stmt;
3451 else
3453 /* t4 = oprnd0 >> (prec - 1);
3454 or if we know from VRP that oprnd0 >= 0
3455 t4 = 0;
3456 or if we know from VRP that oprnd0 < 0
3457 t4 = -1; */
3458 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3459 t4 = vect_recog_temp_ssa_var (itype, NULL);
3460 if (msb != 1)
3461 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3462 build_int_cst (itype, msb));
3463 else
3464 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3465 build_int_cst (itype, prec - 1));
3466 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3468 /* q = t3 - t4; or q = t4 - t3; */
3469 q = vect_recog_temp_ssa_var (itype, NULL);
3470 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3471 d < 0 ? t3 : t4);
3475 if (rhs_code == TRUNC_MOD_EXPR)
3477 tree r, t1;
3479 /* We divided. Now finish by:
3480 t1 = q * oprnd1;
3481 r = oprnd0 - t1; */
3482 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3484 t1 = vect_recog_temp_ssa_var (itype, NULL);
3485 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3486 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3488 r = vect_recog_temp_ssa_var (itype, NULL);
3489 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3492 /* Pattern detected. */
3493 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3495 *type_out = vectype;
3496 return pattern_stmt;
3499 /* Function vect_recog_mixed_size_cond_pattern
3501 Try to find the following pattern:
3503 type x_t, y_t;
3504 TYPE a_T, b_T, c_T;
3505 loop:
3506 S1 a_T = x_t CMP y_t ? b_T : c_T;
3508 where type 'TYPE' is an integral type which has different size
3509 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3510 than 'type', the constants need to fit into an integer type
3511 with the same width as 'type') or results of conversion from 'type'.
3513 Input:
3515 * STMT_VINFO: The stmt from which the pattern search begins.
3517 Output:
3519 * TYPE_OUT: The type of the output of this pattern.
3521 * Return value: A new stmt that will be used to replace the pattern.
3522 Additionally a def_stmt is added.
3524 a_it = x_t CMP y_t ? b_it : c_it;
3525 a_T = (TYPE) a_it; */
3527 static gimple *
3528 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3529 stmt_vec_info stmt_vinfo, tree *type_out)
3531 gimple *last_stmt = stmt_vinfo->stmt;
3532 tree cond_expr, then_clause, else_clause;
3533 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3534 gimple *pattern_stmt, *def_stmt;
3535 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3536 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3537 bool promotion;
3538 tree comp_scalar_type;
3540 if (!is_gimple_assign (last_stmt)
3541 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3542 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3543 return NULL;
3545 cond_expr = gimple_assign_rhs1 (last_stmt);
3546 then_clause = gimple_assign_rhs2 (last_stmt);
3547 else_clause = gimple_assign_rhs3 (last_stmt);
3549 if (!COMPARISON_CLASS_P (cond_expr))
3550 return NULL;
3552 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3553 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3554 if (comp_vectype == NULL_TREE)
3555 return NULL;
3557 type = gimple_expr_type (last_stmt);
3558 if (types_compatible_p (type, comp_scalar_type)
3559 || ((TREE_CODE (then_clause) != INTEGER_CST
3560 || TREE_CODE (else_clause) != INTEGER_CST)
3561 && !INTEGRAL_TYPE_P (comp_scalar_type))
3562 || !INTEGRAL_TYPE_P (type))
3563 return NULL;
3565 if ((TREE_CODE (then_clause) != INTEGER_CST
3566 && !type_conversion_p (vinfo, then_clause, false,
3567 &orig_type0, &def_stmt0, &promotion))
3568 || (TREE_CODE (else_clause) != INTEGER_CST
3569 && !type_conversion_p (vinfo, else_clause, false,
3570 &orig_type1, &def_stmt1, &promotion)))
3571 return NULL;
3573 if (orig_type0 && orig_type1
3574 && !types_compatible_p (orig_type0, orig_type1))
3575 return NULL;
3577 if (orig_type0)
3579 if (!types_compatible_p (orig_type0, comp_scalar_type))
3580 return NULL;
3581 then_clause = gimple_assign_rhs1 (def_stmt0);
3582 itype = orig_type0;
3585 if (orig_type1)
3587 if (!types_compatible_p (orig_type1, comp_scalar_type))
3588 return NULL;
3589 else_clause = gimple_assign_rhs1 (def_stmt1);
3590 itype = orig_type1;
3594 HOST_WIDE_INT cmp_mode_size
3595 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3597 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3598 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3599 return NULL;
3601 vectype = get_vectype_for_scalar_type (vinfo, type);
3602 if (vectype == NULL_TREE)
3603 return NULL;
3605 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3606 return NULL;
3608 if (itype == NULL_TREE)
3609 itype = build_nonstandard_integer_type (cmp_mode_size,
3610 TYPE_UNSIGNED (type));
3612 if (itype == NULL_TREE
3613 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3614 return NULL;
3616 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3617 if (vecitype == NULL_TREE)
3618 return NULL;
3620 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3621 return NULL;
3623 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3625 if ((TREE_CODE (then_clause) == INTEGER_CST
3626 && !int_fits_type_p (then_clause, itype))
3627 || (TREE_CODE (else_clause) == INTEGER_CST
3628 && !int_fits_type_p (else_clause, itype)))
3629 return NULL;
3632 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3633 COND_EXPR, unshare_expr (cond_expr),
3634 fold_convert (itype, then_clause),
3635 fold_convert (itype, else_clause));
3636 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3637 NOP_EXPR, gimple_assign_lhs (def_stmt));
3639 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3640 *type_out = vectype;
3642 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3644 return pattern_stmt;
3648 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3649 true if bool VAR can and should be optimized that way. Assume it shouldn't
3650 in case it's a result of a comparison which can be directly vectorized into
3651 a vector comparison. Fills in STMTS with all stmts visited during the
3652 walk. */
3654 static bool
3655 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3657 tree rhs1;
3658 enum tree_code rhs_code;
3660 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3661 if (!def_stmt_info)
3662 return false;
3664 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3665 if (!def_stmt)
3666 return false;
3668 if (stmts.contains (def_stmt))
3669 return true;
3671 rhs1 = gimple_assign_rhs1 (def_stmt);
3672 rhs_code = gimple_assign_rhs_code (def_stmt);
3673 switch (rhs_code)
3675 case SSA_NAME:
3676 if (! check_bool_pattern (rhs1, vinfo, stmts))
3677 return false;
3678 break;
3680 CASE_CONVERT:
3681 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3682 return false;
3683 if (! check_bool_pattern (rhs1, vinfo, stmts))
3684 return false;
3685 break;
3687 case BIT_NOT_EXPR:
3688 if (! check_bool_pattern (rhs1, vinfo, stmts))
3689 return false;
3690 break;
3692 case BIT_AND_EXPR:
3693 case BIT_IOR_EXPR:
3694 case BIT_XOR_EXPR:
3695 if (! check_bool_pattern (rhs1, vinfo, stmts)
3696 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3697 return false;
3698 break;
3700 default:
3701 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3703 tree vecitype, comp_vectype;
3705 /* If the comparison can throw, then is_gimple_condexpr will be
3706 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3707 if (stmt_could_throw_p (cfun, def_stmt))
3708 return false;
3710 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3711 if (comp_vectype == NULL_TREE)
3712 return false;
3714 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3715 TREE_TYPE (rhs1));
3716 if (mask_type
3717 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3718 return false;
3720 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3722 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3723 tree itype
3724 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3725 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3726 if (vecitype == NULL_TREE)
3727 return false;
3729 else
3730 vecitype = comp_vectype;
3731 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3732 return false;
3734 else
3735 return false;
3736 break;
3739 bool res = stmts.add (def_stmt);
3740 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3741 gcc_assert (!res);
3743 return true;
3747 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3748 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3749 pattern sequence. */
3751 static tree
3752 adjust_bool_pattern_cast (vec_info *vinfo,
3753 tree type, tree var, stmt_vec_info stmt_info)
3755 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3756 NOP_EXPR, var);
3757 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
3758 get_vectype_for_scalar_type (vinfo, type));
3759 return gimple_assign_lhs (cast_stmt);
3762 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3763 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3764 type, OUT_TYPE is the desired final integer type of the whole pattern.
3765 STMT_INFO is the info of the pattern root and is where pattern stmts should
3766 be associated with. DEFS is a map of pattern defs. */
3768 static void
3769 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
3770 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3772 gimple *stmt = SSA_NAME_DEF_STMT (var);
3773 enum tree_code rhs_code, def_rhs_code;
3774 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3775 location_t loc;
3776 gimple *pattern_stmt, *def_stmt;
3777 tree trueval = NULL_TREE;
3779 rhs1 = gimple_assign_rhs1 (stmt);
3780 rhs2 = gimple_assign_rhs2 (stmt);
3781 rhs_code = gimple_assign_rhs_code (stmt);
3782 loc = gimple_location (stmt);
3783 switch (rhs_code)
3785 case SSA_NAME:
3786 CASE_CONVERT:
3787 irhs1 = *defs.get (rhs1);
3788 itype = TREE_TYPE (irhs1);
3789 pattern_stmt
3790 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3791 SSA_NAME, irhs1);
3792 break;
3794 case BIT_NOT_EXPR:
3795 irhs1 = *defs.get (rhs1);
3796 itype = TREE_TYPE (irhs1);
3797 pattern_stmt
3798 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3799 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3800 break;
3802 case BIT_AND_EXPR:
3803 /* Try to optimize x = y & (a < b ? 1 : 0); into
3804 x = (a < b ? y : 0);
3806 E.g. for:
3807 bool a_b, b_b, c_b;
3808 TYPE d_T;
3810 S1 a_b = x1 CMP1 y1;
3811 S2 b_b = x2 CMP2 y2;
3812 S3 c_b = a_b & b_b;
3813 S4 d_T = (TYPE) c_b;
3815 we would normally emit:
3817 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3818 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3819 S3' c_T = a_T & b_T;
3820 S4' d_T = c_T;
3822 but we can save one stmt by using the
3823 result of one of the COND_EXPRs in the other COND_EXPR and leave
3824 BIT_AND_EXPR stmt out:
3826 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3827 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3828 S4' f_T = c_T;
3830 At least when VEC_COND_EXPR is implemented using masks
3831 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3832 computes the comparison masks and ands it, in one case with
3833 all ones vector, in the other case with a vector register.
3834 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3835 often more expensive. */
3836 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3837 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3838 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3840 irhs1 = *defs.get (rhs1);
3841 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3842 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3843 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3845 rhs_code = def_rhs_code;
3846 rhs1 = def_rhs1;
3847 rhs2 = gimple_assign_rhs2 (def_stmt);
3848 trueval = irhs1;
3849 goto do_compare;
3851 else
3852 irhs2 = *defs.get (rhs2);
3853 goto and_ior_xor;
3855 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3856 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3857 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3859 irhs2 = *defs.get (rhs2);
3860 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3861 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3862 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3864 rhs_code = def_rhs_code;
3865 rhs1 = def_rhs1;
3866 rhs2 = gimple_assign_rhs2 (def_stmt);
3867 trueval = irhs2;
3868 goto do_compare;
3870 else
3871 irhs1 = *defs.get (rhs1);
3872 goto and_ior_xor;
3874 /* FALLTHRU */
3875 case BIT_IOR_EXPR:
3876 case BIT_XOR_EXPR:
3877 irhs1 = *defs.get (rhs1);
3878 irhs2 = *defs.get (rhs2);
3879 and_ior_xor:
3880 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3881 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3883 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3884 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3885 int out_prec = TYPE_PRECISION (out_type);
3886 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3887 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
3888 stmt_info);
3889 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3890 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
3891 stmt_info);
3892 else
3894 irhs1 = adjust_bool_pattern_cast (vinfo,
3895 out_type, irhs1, stmt_info);
3896 irhs2 = adjust_bool_pattern_cast (vinfo,
3897 out_type, irhs2, stmt_info);
3900 itype = TREE_TYPE (irhs1);
3901 pattern_stmt
3902 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3903 rhs_code, irhs1, irhs2);
3904 break;
3906 default:
3907 do_compare:
3908 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3909 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3910 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3911 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3912 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3914 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3915 itype
3916 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3918 else
3919 itype = TREE_TYPE (rhs1);
3920 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3921 if (trueval == NULL_TREE)
3922 trueval = build_int_cst (itype, 1);
3923 else
3924 gcc_checking_assert (useless_type_conversion_p (itype,
3925 TREE_TYPE (trueval)));
3926 pattern_stmt
3927 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3928 COND_EXPR, cond_expr, trueval,
3929 build_int_cst (itype, 0));
3930 break;
3933 gimple_set_location (pattern_stmt, loc);
3934 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
3935 get_vectype_for_scalar_type (vinfo, itype));
3936 defs.put (var, gimple_assign_lhs (pattern_stmt));
3939 /* Comparison function to qsort a vector of gimple stmts after UID. */
3941 static int
3942 sort_after_uid (const void *p1, const void *p2)
3944 const gimple *stmt1 = *(const gimple * const *)p1;
3945 const gimple *stmt2 = *(const gimple * const *)p2;
3946 return gimple_uid (stmt1) - gimple_uid (stmt2);
3949 /* Create pattern stmts for all stmts participating in the bool pattern
3950 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3951 OUT_TYPE. Return the def of the pattern root. */
3953 static tree
3954 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
3955 tree out_type, stmt_vec_info stmt_info)
3957 /* Gather original stmts in the bool pattern in their order of appearance
3958 in the IL. */
3959 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3960 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3961 i != bool_stmt_set.end (); ++i)
3962 bool_stmts.quick_push (*i);
3963 bool_stmts.qsort (sort_after_uid);
3965 /* Now process them in that order, producing pattern stmts. */
3966 hash_map <tree, tree> defs;
3967 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3968 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
3969 out_type, stmt_info, defs);
3971 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3972 gimple *pattern_stmt
3973 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3974 return gimple_assign_lhs (pattern_stmt);
3977 /* Return the proper type for converting bool VAR into
3978 an integer value or NULL_TREE if no such type exists.
3979 The type is chosen so that the converted value has the
3980 same number of elements as VAR's vector type. */
3982 static tree
3983 integer_type_for_mask (tree var, vec_info *vinfo)
3985 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3986 return NULL_TREE;
3988 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3989 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
3990 return NULL_TREE;
3992 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
3995 /* Function vect_recog_bool_pattern
3997 Try to find pattern like following:
3999 bool a_b, b_b, c_b, d_b, e_b;
4000 TYPE f_T;
4001 loop:
4002 S1 a_b = x1 CMP1 y1;
4003 S2 b_b = x2 CMP2 y2;
4004 S3 c_b = a_b & b_b;
4005 S4 d_b = x3 CMP3 y3;
4006 S5 e_b = c_b | d_b;
4007 S6 f_T = (TYPE) e_b;
4009 where type 'TYPE' is an integral type. Or a similar pattern
4010 ending in
4012 S6 f_Y = e_b ? r_Y : s_Y;
4014 as results from if-conversion of a complex condition.
4016 Input:
4018 * STMT_VINFO: The stmt at the end from which the pattern
4019 search begins, i.e. cast of a bool to
4020 an integer type.
4022 Output:
4024 * TYPE_OUT: The type of the output of this pattern.
4026 * Return value: A new stmt that will be used to replace the pattern.
4028 Assuming size of TYPE is the same as size of all comparisons
4029 (otherwise some casts would be added where needed), the above
4030 sequence we create related pattern stmts:
4031 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4032 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4033 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4034 S5' e_T = c_T | d_T;
4035 S6' f_T = e_T;
4037 Instead of the above S3' we could emit:
4038 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4039 S3' c_T = a_T | b_T;
4040 but the above is more efficient. */
4042 static gimple *
4043 vect_recog_bool_pattern (vec_info *vinfo,
4044 stmt_vec_info stmt_vinfo, tree *type_out)
4046 gimple *last_stmt = stmt_vinfo->stmt;
4047 enum tree_code rhs_code;
4048 tree var, lhs, rhs, vectype;
4049 gimple *pattern_stmt;
4051 if (!is_gimple_assign (last_stmt))
4052 return NULL;
4054 var = gimple_assign_rhs1 (last_stmt);
4055 lhs = gimple_assign_lhs (last_stmt);
4056 rhs_code = gimple_assign_rhs_code (last_stmt);
4058 if (rhs_code == VIEW_CONVERT_EXPR)
4059 var = TREE_OPERAND (var, 0);
4061 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4062 return NULL;
4064 hash_set<gimple *> bool_stmts;
4066 if (CONVERT_EXPR_CODE_P (rhs_code)
4067 || rhs_code == VIEW_CONVERT_EXPR)
4069 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4070 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
4071 return NULL;
4072 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4073 if (vectype == NULL_TREE)
4074 return NULL;
4076 if (check_bool_pattern (var, vinfo, bool_stmts))
4078 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4079 TREE_TYPE (lhs), stmt_vinfo);
4080 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4081 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4082 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4083 else
4084 pattern_stmt
4085 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4087 else
4089 tree type = integer_type_for_mask (var, vinfo);
4090 tree cst0, cst1, tmp;
4092 if (!type)
4093 return NULL;
4095 /* We may directly use cond with narrowed type to avoid
4096 multiple cond exprs with following result packing and
4097 perform single cond with packed mask instead. In case
4098 of widening we better make cond first and then extract
4099 results. */
4100 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4101 type = TREE_TYPE (lhs);
4103 cst0 = build_int_cst (type, 0);
4104 cst1 = build_int_cst (type, 1);
4105 tmp = vect_recog_temp_ssa_var (type, NULL);
4106 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4108 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4110 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4111 append_pattern_def_seq (vinfo, stmt_vinfo,
4112 pattern_stmt, new_vectype);
4114 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4115 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4119 *type_out = vectype;
4120 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4122 return pattern_stmt;
4124 else if (rhs_code == COND_EXPR
4125 && TREE_CODE (var) == SSA_NAME)
4127 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4128 if (vectype == NULL_TREE)
4129 return NULL;
4131 /* Build a scalar type for the boolean result that when
4132 vectorized matches the vector type of the result in
4133 size and number of elements. */
4134 unsigned prec
4135 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4136 TYPE_VECTOR_SUBPARTS (vectype));
4138 tree type
4139 = build_nonstandard_integer_type (prec,
4140 TYPE_UNSIGNED (TREE_TYPE (var)));
4141 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4142 return NULL;
4144 if (!check_bool_pattern (var, vinfo, bool_stmts))
4145 return NULL;
4147 rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4149 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4150 pattern_stmt
4151 = gimple_build_assign (lhs, COND_EXPR,
4152 build2 (NE_EXPR, boolean_type_node,
4153 rhs, build_int_cst (type, 0)),
4154 gimple_assign_rhs2 (last_stmt),
4155 gimple_assign_rhs3 (last_stmt));
4156 *type_out = vectype;
4157 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4159 return pattern_stmt;
4161 else if (rhs_code == SSA_NAME
4162 && STMT_VINFO_DATA_REF (stmt_vinfo))
4164 stmt_vec_info pattern_stmt_info;
4165 tree nunits_vectype;
4166 if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4167 &nunits_vectype)
4168 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4169 return NULL;
4171 if (check_bool_pattern (var, vinfo, bool_stmts))
4172 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4173 TREE_TYPE (vectype), stmt_vinfo);
4174 else
4176 tree type = integer_type_for_mask (var, vinfo);
4177 tree cst0, cst1, new_vectype;
4179 if (!type)
4180 return NULL;
4182 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4183 type = TREE_TYPE (vectype);
4185 cst0 = build_int_cst (type, 0);
4186 cst1 = build_int_cst (type, 1);
4187 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4189 rhs = vect_recog_temp_ssa_var (type, NULL);
4190 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4191 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4194 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4195 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4197 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4198 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4199 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4200 rhs = rhs2;
4202 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4203 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4204 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4205 *type_out = vectype;
4206 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4208 return pattern_stmt;
4210 else
4211 return NULL;
4215 /* A helper for vect_recog_mask_conversion_pattern. Build
4216 conversion of MASK to a type suitable for masking VECTYPE.
4217 Built statement gets required vectype and is appended to
4218 a pattern sequence of STMT_VINFO.
4220 Return converted mask. */
4222 static tree
4223 build_mask_conversion (vec_info *vinfo,
4224 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4226 gimple *stmt;
4227 tree masktype, tmp;
4229 masktype = truth_type_for (vectype);
4230 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4231 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4232 append_pattern_def_seq (vinfo, stmt_vinfo,
4233 stmt, masktype, TREE_TYPE (vectype));
4235 return tmp;
4239 /* Function vect_recog_mask_conversion_pattern
4241 Try to find statements which require boolean type
4242 converison. Additional conversion statements are
4243 added to handle such cases. For example:
4245 bool m_1, m_2, m_3;
4246 int i_4, i_5;
4247 double d_6, d_7;
4248 char c_1, c_2, c_3;
4250 S1 m_1 = i_4 > i_5;
4251 S2 m_2 = d_6 < d_7;
4252 S3 m_3 = m_1 & m_2;
4253 S4 c_1 = m_3 ? c_2 : c_3;
4255 Will be transformed into:
4257 S1 m_1 = i_4 > i_5;
4258 S2 m_2 = d_6 < d_7;
4259 S3'' m_2' = (_Bool[bitsize=32])m_2
4260 S3' m_3' = m_1 & m_2';
4261 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4262 S4' c_1' = m_3'' ? c_2 : c_3; */
4264 static gimple *
4265 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4266 stmt_vec_info stmt_vinfo, tree *type_out)
4268 gimple *last_stmt = stmt_vinfo->stmt;
4269 enum tree_code rhs_code;
4270 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4271 tree vectype1, vectype2;
4272 stmt_vec_info pattern_stmt_info;
4273 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4274 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4276 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4277 if (is_gimple_call (last_stmt)
4278 && gimple_call_internal_p (last_stmt))
4280 gcall *pattern_stmt;
4282 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4283 int mask_argno = internal_fn_mask_index (ifn);
4284 if (mask_argno < 0)
4285 return NULL;
4287 bool store_p = internal_store_fn_p (ifn);
4288 if (store_p)
4290 int rhs_index = internal_fn_stored_value_index (ifn);
4291 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4292 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4294 else
4296 lhs = gimple_call_lhs (last_stmt);
4297 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4300 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4301 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4302 if (!mask_arg_type)
4303 return NULL;
4304 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4306 if (!vectype1 || !vectype2
4307 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4308 TYPE_VECTOR_SUBPARTS (vectype2)))
4309 return NULL;
4311 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4313 auto_vec<tree, 8> args;
4314 unsigned int nargs = gimple_call_num_args (last_stmt);
4315 args.safe_grow (nargs, true);
4316 for (unsigned int i = 0; i < nargs; ++i)
4317 args[i] = ((int) i == mask_argno
4318 ? tmp
4319 : gimple_call_arg (last_stmt, i));
4320 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4322 if (!store_p)
4324 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4325 gimple_call_set_lhs (pattern_stmt, lhs);
4327 gimple_call_set_nothrow (pattern_stmt, true);
4329 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4330 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4331 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4333 *type_out = vectype1;
4334 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4336 return pattern_stmt;
4339 if (!is_gimple_assign (last_stmt))
4340 return NULL;
4342 gimple *pattern_stmt;
4343 lhs = gimple_assign_lhs (last_stmt);
4344 rhs1 = gimple_assign_rhs1 (last_stmt);
4345 rhs_code = gimple_assign_rhs_code (last_stmt);
4347 /* Check for cond expression requiring mask conversion. */
4348 if (rhs_code == COND_EXPR)
4350 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4352 if (TREE_CODE (rhs1) == SSA_NAME)
4354 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4355 if (!rhs1_type)
4356 return NULL;
4358 else if (COMPARISON_CLASS_P (rhs1))
4360 /* Check whether we're comparing scalar booleans and (if so)
4361 whether a better mask type exists than the mask associated
4362 with boolean-sized elements. This avoids unnecessary packs
4363 and unpacks if the booleans are set from comparisons of
4364 wider types. E.g. in:
4366 int x1, x2, x3, x4, y1, y1;
4368 bool b1 = (x1 == x2);
4369 bool b2 = (x3 == x4);
4370 ... = b1 == b2 ? y1 : y2;
4372 it is better for b1 and b2 to use the mask type associated
4373 with int elements rather bool (byte) elements. */
4374 rhs1_op0 = TREE_OPERAND (rhs1, 0);
4375 rhs1_op1 = TREE_OPERAND (rhs1, 1);
4376 if (!rhs1_op0 || !rhs1_op1)
4377 return NULL;
4378 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4379 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4381 if (!rhs1_op0_type)
4382 rhs1_type = TREE_TYPE (rhs1_op0);
4383 else if (!rhs1_op1_type)
4384 rhs1_type = TREE_TYPE (rhs1_op1);
4385 else if (TYPE_PRECISION (rhs1_op0_type)
4386 != TYPE_PRECISION (rhs1_op1_type))
4388 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4389 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4390 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4391 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4392 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4394 if (abs (tmp0) > abs (tmp1))
4395 rhs1_type = rhs1_op1_type;
4396 else
4397 rhs1_type = rhs1_op0_type;
4399 else
4400 rhs1_type = build_nonstandard_integer_type
4401 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4403 else
4404 rhs1_type = rhs1_op0_type;
4406 else
4407 return NULL;
4409 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4411 if (!vectype1 || !vectype2)
4412 return NULL;
4414 /* Continue if a conversion is needed. Also continue if we have
4415 a comparison whose vector type would normally be different from
4416 VECTYPE2 when considered in isolation. In that case we'll
4417 replace the comparison with an SSA name (so that we can record
4418 its vector type) and behave as though the comparison was an SSA
4419 name from the outset. */
4420 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4421 TYPE_VECTOR_SUBPARTS (vectype2))
4422 && !rhs1_op0_type
4423 && !rhs1_op1_type)
4424 return NULL;
4426 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4427 in place, we can handle it in vectorizable_condition. This avoids
4428 unnecessary promotion stmts and increased vectorization factor. */
4429 if (COMPARISON_CLASS_P (rhs1)
4430 && INTEGRAL_TYPE_P (rhs1_type)
4431 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4432 TYPE_VECTOR_SUBPARTS (vectype2)))
4434 enum vect_def_type dt;
4435 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4436 && dt == vect_external_def
4437 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4438 && (dt == vect_external_def
4439 || dt == vect_constant_def))
4441 tree wide_scalar_type = build_nonstandard_integer_type
4442 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4443 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4444 wide_scalar_type);
4445 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4446 return NULL;
4450 /* If rhs1 is a comparison we need to move it into a
4451 separate statement. */
4452 if (TREE_CODE (rhs1) != SSA_NAME)
4454 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4455 if (rhs1_op0_type
4456 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4457 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4458 vectype2, stmt_vinfo);
4459 if (rhs1_op1_type
4460 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4461 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4462 vectype2, stmt_vinfo);
4463 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4464 rhs1_op0, rhs1_op1);
4465 rhs1 = tmp;
4466 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4467 rhs1_type);
4470 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4471 TYPE_VECTOR_SUBPARTS (vectype2)))
4472 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4473 else
4474 tmp = rhs1;
4476 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4477 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4478 gimple_assign_rhs2 (last_stmt),
4479 gimple_assign_rhs3 (last_stmt));
4481 *type_out = vectype1;
4482 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4484 return pattern_stmt;
4487 /* Now check for binary boolean operations requiring conversion for
4488 one of operands. */
4489 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4490 return NULL;
4492 if (rhs_code != BIT_IOR_EXPR
4493 && rhs_code != BIT_XOR_EXPR
4494 && rhs_code != BIT_AND_EXPR
4495 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4496 return NULL;
4498 rhs2 = gimple_assign_rhs2 (last_stmt);
4500 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4501 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4503 if (!rhs1_type || !rhs2_type
4504 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4505 return NULL;
4507 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4509 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4510 if (!vectype1)
4511 return NULL;
4512 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4514 else
4516 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4517 if (!vectype1)
4518 return NULL;
4519 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4522 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4523 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4525 *type_out = vectype1;
4526 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4528 return pattern_stmt;
4531 /* STMT_INFO is a load or store. If the load or store is conditional, return
4532 the boolean condition under which it occurs, otherwise return null. */
4534 static tree
4535 vect_get_load_store_mask (stmt_vec_info stmt_info)
4537 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4539 gcc_assert (gimple_assign_single_p (def_assign));
4540 return NULL_TREE;
4543 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4545 internal_fn ifn = gimple_call_internal_fn (def_call);
4546 int mask_index = internal_fn_mask_index (ifn);
4547 return gimple_call_arg (def_call, mask_index);
4550 gcc_unreachable ();
4553 /* Return MASK if MASK is suitable for masking an operation on vectors
4554 of type VECTYPE, otherwise convert it into such a form and return
4555 the result. Associate any conversion statements with STMT_INFO's
4556 pattern. */
4558 static tree
4559 vect_convert_mask_for_vectype (tree mask, tree vectype,
4560 stmt_vec_info stmt_info, vec_info *vinfo)
4562 tree mask_type = integer_type_for_mask (mask, vinfo);
4563 if (mask_type)
4565 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4566 if (mask_vectype
4567 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4568 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4569 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4571 return mask;
4574 /* Return the equivalent of:
4576 fold_convert (TYPE, VALUE)
4578 with the expectation that the operation will be vectorized.
4579 If new statements are needed, add them as pattern statements
4580 to STMT_INFO. */
4582 static tree
4583 vect_add_conversion_to_pattern (vec_info *vinfo,
4584 tree type, tree value, stmt_vec_info stmt_info)
4586 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4587 return value;
4589 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4590 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4591 append_pattern_def_seq (vinfo, stmt_info, conversion,
4592 get_vectype_for_scalar_type (vinfo, type));
4593 return new_value;
4596 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4597 internal function. Return the final statement on success and set
4598 *TYPE_OUT to the vector type being loaded or stored.
4600 This function only handles gathers and scatters that were recognized
4601 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4603 static gimple *
4604 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4605 stmt_vec_info stmt_info, tree *type_out)
4607 /* Currently we only support this for loop vectorization. */
4608 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4609 if (!loop_vinfo)
4610 return NULL;
4612 /* Make sure that we're looking at a gather load or scatter store. */
4613 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4614 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4615 return NULL;
4617 /* Get the boolean that controls whether the load or store happens.
4618 This is null if the operation is unconditional. */
4619 tree mask = vect_get_load_store_mask (stmt_info);
4621 /* Make sure that the target supports an appropriate internal
4622 function for the gather/scatter operation. */
4623 gather_scatter_info gs_info;
4624 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4625 || gs_info.decl)
4626 return NULL;
4628 /* Convert the mask to the right form. */
4629 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4630 gs_info.element_type);
4631 if (mask)
4632 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4633 loop_vinfo);
4635 /* Get the invariant base and non-invariant offset, converting the
4636 latter to the same width as the vector elements. */
4637 tree base = gs_info.base;
4638 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4639 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4640 gs_info.offset, stmt_info);
4642 /* Build the new pattern statement. */
4643 tree scale = size_int (gs_info.scale);
4644 gcall *pattern_stmt;
4645 if (DR_IS_READ (dr))
4647 tree zero = build_zero_cst (gs_info.element_type);
4648 if (mask != NULL)
4649 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4650 offset, scale, zero, mask);
4651 else
4652 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4653 offset, scale, zero);
4654 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4655 gimple_call_set_lhs (pattern_stmt, load_lhs);
4657 else
4659 tree rhs = vect_get_store_rhs (stmt_info);
4660 if (mask != NULL)
4661 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4662 base, offset, scale, rhs,
4663 mask);
4664 else
4665 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4666 base, offset, scale, rhs);
4668 gimple_call_set_nothrow (pattern_stmt, true);
4670 /* Copy across relevant vectorization info and associate DR with the
4671 new pattern statement instead of the original statement. */
4672 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4673 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4675 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4676 *type_out = vectype;
4677 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4679 return pattern_stmt;
4682 /* Return true if TYPE is a non-boolean integer type. These are the types
4683 that we want to consider for narrowing. */
4685 static bool
4686 vect_narrowable_type_p (tree type)
4688 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4691 /* Return true if the operation given by CODE can be truncated to N bits
4692 when only N bits of the output are needed. This is only true if bit N+1
4693 of the inputs has no effect on the low N bits of the result. */
4695 static bool
4696 vect_truncatable_operation_p (tree_code code)
4698 switch (code)
4700 case PLUS_EXPR:
4701 case MINUS_EXPR:
4702 case MULT_EXPR:
4703 case BIT_AND_EXPR:
4704 case BIT_IOR_EXPR:
4705 case BIT_XOR_EXPR:
4706 case COND_EXPR:
4707 return true;
4709 default:
4710 return false;
4714 /* Record that STMT_INFO could be changed from operating on TYPE to
4715 operating on a type with the precision and sign given by PRECISION
4716 and SIGN respectively. PRECISION is an arbitrary bit precision;
4717 it might not be a whole number of bytes. */
4719 static void
4720 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4721 unsigned int precision, signop sign)
4723 /* Round the precision up to a whole number of bytes. */
4724 precision = vect_element_precision (precision);
4725 if (precision < TYPE_PRECISION (type)
4726 && (!stmt_info->operation_precision
4727 || stmt_info->operation_precision > precision))
4729 stmt_info->operation_precision = precision;
4730 stmt_info->operation_sign = sign;
4734 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4735 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4736 is an arbitrary bit precision; it might not be a whole number of bytes. */
4738 static void
4739 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4740 unsigned int min_input_precision)
4742 /* This operation in isolation only requires the inputs to have
4743 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4744 that MIN_INPUT_PRECISION is a natural precision for the chain
4745 as a whole. E.g. consider something like:
4747 unsigned short *x, *y;
4748 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4750 The right shift can be done on unsigned chars, and only requires the
4751 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4752 approach would mean turning a natural chain of single-vector unsigned
4753 short operations into one that truncates "*x" and then extends
4754 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4755 operation and one vector for each unsigned char operation.
4756 This would be a significant pessimization.
4758 Instead only propagate the maximum of this precision and the precision
4759 required by the users of the result. This means that we don't pessimize
4760 the case above but continue to optimize things like:
4762 unsigned char *y;
4763 unsigned short *x;
4764 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4766 Here we would truncate two vectors of *x to a single vector of
4767 unsigned chars and use single-vector unsigned char operations for
4768 everything else, rather than doing two unsigned short copies of
4769 "(*x & 0xf0) >> 4" and then truncating the result. */
4770 min_input_precision = MAX (min_input_precision,
4771 stmt_info->min_output_precision);
4773 if (min_input_precision < TYPE_PRECISION (type)
4774 && (!stmt_info->min_input_precision
4775 || stmt_info->min_input_precision > min_input_precision))
4776 stmt_info->min_input_precision = min_input_precision;
4779 /* Subroutine of vect_determine_min_output_precision. Return true if
4780 we can calculate a reduced number of output bits for STMT_INFO,
4781 whose result is LHS. */
4783 static bool
4784 vect_determine_min_output_precision_1 (vec_info *vinfo,
4785 stmt_vec_info stmt_info, tree lhs)
4787 /* Take the maximum precision required by users of the result. */
4788 unsigned int precision = 0;
4789 imm_use_iterator iter;
4790 use_operand_p use;
4791 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4793 gimple *use_stmt = USE_STMT (use);
4794 if (is_gimple_debug (use_stmt))
4795 continue;
4796 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4797 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4798 return false;
4799 /* The input precision recorded for COND_EXPRs applies only to the
4800 "then" and "else" values. */
4801 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4802 if (assign
4803 && gimple_assign_rhs_code (assign) == COND_EXPR
4804 && use->use != gimple_assign_rhs2_ptr (assign)
4805 && use->use != gimple_assign_rhs3_ptr (assign))
4806 return false;
4807 precision = MAX (precision, use_stmt_info->min_input_precision);
4810 if (dump_enabled_p ())
4811 dump_printf_loc (MSG_NOTE, vect_location,
4812 "only the low %d bits of %T are significant\n",
4813 precision, lhs);
4814 stmt_info->min_output_precision = precision;
4815 return true;
4818 /* Calculate min_output_precision for STMT_INFO. */
4820 static void
4821 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
4823 /* We're only interested in statements with a narrowable result. */
4824 tree lhs = gimple_get_lhs (stmt_info->stmt);
4825 if (!lhs
4826 || TREE_CODE (lhs) != SSA_NAME
4827 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4828 return;
4830 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
4831 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4834 /* Use range information to decide whether STMT (described by STMT_INFO)
4835 could be done in a narrower type. This is effectively a forward
4836 propagation, since it uses context-independent information that applies
4837 to all users of an SSA name. */
4839 static void
4840 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4842 tree lhs = gimple_assign_lhs (stmt);
4843 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4844 return;
4846 tree type = TREE_TYPE (lhs);
4847 if (!vect_narrowable_type_p (type))
4848 return;
4850 /* First see whether we have any useful range information for the result. */
4851 unsigned int precision = TYPE_PRECISION (type);
4852 signop sign = TYPE_SIGN (type);
4853 wide_int min_value, max_value;
4854 if (!vect_get_range_info (lhs, &min_value, &max_value))
4855 return;
4857 tree_code code = gimple_assign_rhs_code (stmt);
4858 unsigned int nops = gimple_num_ops (stmt);
4860 if (!vect_truncatable_operation_p (code))
4861 /* Check that all relevant input operands are compatible, and update
4862 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4863 for (unsigned int i = 1; i < nops; ++i)
4865 tree op = gimple_op (stmt, i);
4866 if (TREE_CODE (op) == INTEGER_CST)
4868 /* Don't require the integer to have RHS_TYPE (which it might
4869 not for things like shift amounts, etc.), but do require it
4870 to fit the type. */
4871 if (!int_fits_type_p (op, type))
4872 return;
4874 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4875 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4877 else if (TREE_CODE (op) == SSA_NAME)
4879 /* Ignore codes that don't take uniform arguments. */
4880 if (!types_compatible_p (TREE_TYPE (op), type))
4881 return;
4883 wide_int op_min_value, op_max_value;
4884 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4885 return;
4887 min_value = wi::min (min_value, op_min_value, sign);
4888 max_value = wi::max (max_value, op_max_value, sign);
4890 else
4891 return;
4894 /* Try to switch signed types for unsigned types if we can.
4895 This is better for two reasons. First, unsigned ops tend
4896 to be cheaper than signed ops. Second, it means that we can
4897 handle things like:
4899 signed char c;
4900 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4904 signed char c;
4905 unsigned short res_1 = (unsigned short) c & 0xff00;
4906 int res = (int) res_1;
4908 where the intermediate result res_1 has unsigned rather than
4909 signed type. */
4910 if (sign == SIGNED && !wi::neg_p (min_value))
4911 sign = UNSIGNED;
4913 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4914 unsigned int precision1 = wi::min_precision (min_value, sign);
4915 unsigned int precision2 = wi::min_precision (max_value, sign);
4916 unsigned int value_precision = MAX (precision1, precision2);
4917 if (value_precision >= precision)
4918 return;
4920 if (dump_enabled_p ())
4921 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4922 " without loss of precision: %G",
4923 sign == SIGNED ? "signed" : "unsigned",
4924 value_precision, stmt);
4926 vect_set_operation_type (stmt_info, type, value_precision, sign);
4927 vect_set_min_input_precision (stmt_info, type, value_precision);
4930 /* Use information about the users of STMT's result to decide whether
4931 STMT (described by STMT_INFO) could be done in a narrower type.
4932 This is effectively a backward propagation. */
4934 static void
4935 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4937 tree_code code = gimple_assign_rhs_code (stmt);
4938 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4939 tree type = TREE_TYPE (gimple_op (stmt, opno));
4940 if (!vect_narrowable_type_p (type))
4941 return;
4943 unsigned int precision = TYPE_PRECISION (type);
4944 unsigned int operation_precision, min_input_precision;
4945 switch (code)
4947 CASE_CONVERT:
4948 /* Only the bits that contribute to the output matter. Don't change
4949 the precision of the operation itself. */
4950 operation_precision = precision;
4951 min_input_precision = stmt_info->min_output_precision;
4952 break;
4954 case LSHIFT_EXPR:
4955 case RSHIFT_EXPR:
4957 tree shift = gimple_assign_rhs2 (stmt);
4958 if (TREE_CODE (shift) != INTEGER_CST
4959 || !wi::ltu_p (wi::to_widest (shift), precision))
4960 return;
4961 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4962 if (code == LSHIFT_EXPR)
4964 /* We need CONST_SHIFT fewer bits of the input. */
4965 operation_precision = stmt_info->min_output_precision;
4966 min_input_precision = (MAX (operation_precision, const_shift)
4967 - const_shift);
4969 else
4971 /* We need CONST_SHIFT extra bits to do the operation. */
4972 operation_precision = (stmt_info->min_output_precision
4973 + const_shift);
4974 min_input_precision = operation_precision;
4976 break;
4979 default:
4980 if (vect_truncatable_operation_p (code))
4982 /* Input bit N has no effect on output bits N-1 and lower. */
4983 operation_precision = stmt_info->min_output_precision;
4984 min_input_precision = operation_precision;
4985 break;
4987 return;
4990 if (operation_precision < precision)
4992 if (dump_enabled_p ())
4993 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4994 " without affecting users: %G",
4995 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4996 operation_precision, stmt);
4997 vect_set_operation_type (stmt_info, type, operation_precision,
4998 TYPE_SIGN (type));
5000 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5003 /* Return true if the statement described by STMT_INFO sets a boolean
5004 SSA_NAME and if we know how to vectorize this kind of statement using
5005 vector mask types. */
5007 static bool
5008 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5010 tree lhs = gimple_get_lhs (stmt_info->stmt);
5011 if (!lhs
5012 || TREE_CODE (lhs) != SSA_NAME
5013 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5014 return false;
5016 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5018 tree_code rhs_code = gimple_assign_rhs_code (assign);
5019 switch (rhs_code)
5021 CASE_CONVERT:
5022 case SSA_NAME:
5023 case BIT_NOT_EXPR:
5024 case BIT_IOR_EXPR:
5025 case BIT_XOR_EXPR:
5026 case BIT_AND_EXPR:
5027 return true;
5029 default:
5030 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5033 else if (is_a <gphi *> (stmt_info->stmt))
5034 return true;
5035 return false;
5038 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5039 a vector mask type instead of a normal vector type. Record the
5040 result in STMT_INFO->mask_precision. */
5042 static void
5043 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5045 if (!possible_vector_mask_operation_p (stmt_info))
5046 return;
5048 /* If at least one boolean input uses a vector mask type,
5049 pick the mask type with the narrowest elements.
5051 ??? This is the traditional behavior. It should always produce
5052 the smallest number of operations, but isn't necessarily the
5053 optimal choice. For example, if we have:
5055 a = b & c
5057 where:
5059 - the user of a wants it to have a mask type for 16-bit elements (M16)
5060 - b also uses M16
5061 - c uses a mask type for 8-bit elements (M8)
5063 then picking M8 gives:
5065 - 1 M16->M8 pack for b
5066 - 1 M8 AND for a
5067 - 2 M8->M16 unpacks for the user of a
5069 whereas picking M16 would have given:
5071 - 2 M8->M16 unpacks for c
5072 - 2 M16 ANDs for a
5074 The number of operations are equal, but M16 would have given
5075 a shorter dependency chain and allowed more ILP. */
5076 unsigned int precision = ~0U;
5077 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5079 unsigned int nops = gimple_num_ops (assign);
5080 for (unsigned int i = 1; i < nops; ++i)
5082 tree rhs = gimple_op (assign, i);
5083 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5084 continue;
5086 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5087 if (!def_stmt_info)
5088 /* Don't let external or constant operands influence the choice.
5089 We can convert them to whichever vector type we pick. */
5090 continue;
5092 if (def_stmt_info->mask_precision)
5094 if (precision > def_stmt_info->mask_precision)
5095 precision = def_stmt_info->mask_precision;
5099 /* If the statement compares two values that shouldn't use vector masks,
5100 try comparing the values as normal scalars instead. */
5101 tree_code rhs_code = gimple_assign_rhs_code (assign);
5102 if (precision == ~0U
5103 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5105 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5106 scalar_mode mode;
5107 tree vectype, mask_type;
5108 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5109 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5110 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5111 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5112 precision = GET_MODE_BITSIZE (mode);
5115 else
5117 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5118 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5120 tree rhs = gimple_phi_arg_def (phi, i);
5122 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5123 if (!def_stmt_info)
5124 /* Don't let external or constant operands influence the choice.
5125 We can convert them to whichever vector type we pick. */
5126 continue;
5128 if (def_stmt_info->mask_precision)
5130 if (precision > def_stmt_info->mask_precision)
5131 precision = def_stmt_info->mask_precision;
5136 if (dump_enabled_p ())
5138 if (precision == ~0U)
5139 dump_printf_loc (MSG_NOTE, vect_location,
5140 "using normal nonmask vectors for %G",
5141 stmt_info->stmt);
5142 else
5143 dump_printf_loc (MSG_NOTE, vect_location,
5144 "using boolean precision %d for %G",
5145 precision, stmt_info->stmt);
5148 stmt_info->mask_precision = precision;
5151 /* Handle vect_determine_precisions for STMT_INFO, given that we
5152 have already done so for the users of its result. */
5154 void
5155 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5157 vect_determine_min_output_precision (vinfo, stmt_info);
5158 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5160 vect_determine_precisions_from_range (stmt_info, stmt);
5161 vect_determine_precisions_from_users (stmt_info, stmt);
5165 /* Walk backwards through the vectorizable region to determine the
5166 values of these fields:
5168 - min_output_precision
5169 - min_input_precision
5170 - operation_precision
5171 - operation_sign. */
5173 void
5174 vect_determine_precisions (vec_info *vinfo)
5176 DUMP_VECT_SCOPE ("vect_determine_precisions");
5178 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5180 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5181 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5182 unsigned int nbbs = loop->num_nodes;
5184 for (unsigned int i = 0; i < nbbs; i++)
5186 basic_block bb = bbs[i];
5187 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5188 if (!is_gimple_debug (gsi_stmt (si)))
5189 vect_determine_mask_precision
5190 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5192 for (unsigned int i = 0; i < nbbs; i++)
5194 basic_block bb = bbs[nbbs - i - 1];
5195 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5196 !gsi_end_p (si); gsi_prev (&si))
5197 if (!is_gimple_debug (gsi_stmt (si)))
5198 vect_determine_stmt_precisions
5199 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5202 else
5204 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5205 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5207 basic_block bb = bb_vinfo->bbs[i];
5208 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5210 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5211 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5212 vect_determine_mask_precision (vinfo, stmt_info);
5214 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5216 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5217 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5218 vect_determine_mask_precision (vinfo, stmt_info);
5221 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5223 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5224 !gsi_end_p (gsi); gsi_prev (&gsi))
5226 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5227 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5228 vect_determine_stmt_precisions (vinfo, stmt_info);
5230 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5231 !gsi_end_p (gsi); gsi_next (&gsi))
5233 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5234 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5235 vect_determine_stmt_precisions (vinfo, stmt_info);
5241 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5243 struct vect_recog_func
5245 vect_recog_func_ptr fn;
5246 const char *name;
5249 /* Note that ordering matters - the first pattern matching on a stmt is
5250 taken which means usually the more complex one needs to preceed the
5251 less comples onex (widen_sum only after dot_prod or sad for example). */
5252 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5253 { vect_recog_over_widening_pattern, "over_widening" },
5254 /* Must come after over_widening, which narrows the shift as much as
5255 possible beforehand. */
5256 { vect_recog_average_pattern, "average" },
5257 { vect_recog_mulhs_pattern, "mult_high" },
5258 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5259 { vect_recog_widen_mult_pattern, "widen_mult" },
5260 { vect_recog_dot_prod_pattern, "dot_prod" },
5261 { vect_recog_sad_pattern, "sad" },
5262 { vect_recog_widen_sum_pattern, "widen_sum" },
5263 { vect_recog_pow_pattern, "pow" },
5264 { vect_recog_widen_shift_pattern, "widen_shift" },
5265 { vect_recog_rotate_pattern, "rotate" },
5266 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5267 { vect_recog_divmod_pattern, "divmod" },
5268 { vect_recog_mult_pattern, "mult" },
5269 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5270 { vect_recog_bool_pattern, "bool" },
5271 /* This must come before mask conversion, and includes the parts
5272 of mask conversion that are needed for gather and scatter
5273 internal functions. */
5274 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5275 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5276 { vect_recog_widen_plus_pattern, "widen_plus" },
5277 { vect_recog_widen_minus_pattern, "widen_minus" },
5280 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5282 /* Mark statements that are involved in a pattern. */
5284 static inline void
5285 vect_mark_pattern_stmts (vec_info *vinfo,
5286 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5287 tree pattern_vectype)
5289 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5290 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5292 gimple *orig_pattern_stmt = NULL;
5293 if (is_pattern_stmt_p (orig_stmt_info))
5295 /* We're replacing a statement in an existing pattern definition
5296 sequence. */
5297 orig_pattern_stmt = orig_stmt_info->stmt;
5298 if (dump_enabled_p ())
5299 dump_printf_loc (MSG_NOTE, vect_location,
5300 "replacing earlier pattern %G", orig_pattern_stmt);
5302 /* To keep the book-keeping simple, just swap the lhs of the
5303 old and new statements, so that the old one has a valid but
5304 unused lhs. */
5305 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5306 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5307 gimple_set_lhs (pattern_stmt, old_lhs);
5309 if (dump_enabled_p ())
5310 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5312 /* Switch to the statement that ORIG replaces. */
5313 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5315 /* We shouldn't be replacing the main pattern statement. */
5316 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5317 != orig_pattern_stmt);
5320 if (def_seq)
5321 for (gimple_stmt_iterator si = gsi_start (def_seq);
5322 !gsi_end_p (si); gsi_next (&si))
5324 if (dump_enabled_p ())
5325 dump_printf_loc (MSG_NOTE, vect_location,
5326 "extra pattern stmt: %G", gsi_stmt (si));
5327 stmt_vec_info pattern_stmt_info
5328 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5329 orig_stmt_info, pattern_vectype);
5330 /* Stmts in the def sequence are not vectorizable cycle or
5331 induction defs, instead they should all be vect_internal_def
5332 feeding the main pattern stmt which retains this def type. */
5333 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5336 if (orig_pattern_stmt)
5338 vect_init_pattern_stmt (vinfo, pattern_stmt,
5339 orig_stmt_info, pattern_vectype);
5341 /* Insert all the new pattern statements before the original one. */
5342 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5343 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5344 orig_def_seq);
5345 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5346 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5348 /* Remove the pattern statement that this new pattern replaces. */
5349 gsi_remove (&gsi, false);
5351 else
5352 vect_set_pattern_stmt (vinfo,
5353 pattern_stmt, orig_stmt_info, pattern_vectype);
5355 /* Transfer reduction path info to the pattern. */
5356 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5358 tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5359 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5360 /* Search the pattern def sequence and the main pattern stmt. Note
5361 we may have inserted all into a containing pattern def sequence
5362 so the following is a bit awkward. */
5363 gimple_stmt_iterator si;
5364 gimple *s;
5365 if (def_seq)
5367 si = gsi_start (def_seq);
5368 s = gsi_stmt (si);
5369 gsi_next (&si);
5371 else
5373 si = gsi_none ();
5374 s = pattern_stmt;
5378 bool found = false;
5379 for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5380 if (gimple_op (s, i) == lookfor)
5382 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5383 lookfor = gimple_get_lhs (s);
5384 found = true;
5385 break;
5387 if (s == pattern_stmt)
5389 if (!found && dump_enabled_p ())
5390 dump_printf_loc (MSG_NOTE, vect_location,
5391 "failed to update reduction index.\n");
5392 break;
5394 if (gsi_end_p (si))
5395 s = pattern_stmt;
5396 else
5398 s = gsi_stmt (si);
5399 if (s == pattern_stmt)
5400 /* Found the end inside a bigger pattern def seq. */
5401 si = gsi_none ();
5402 else
5403 gsi_next (&si);
5405 } while (1);
5409 /* Function vect_pattern_recog_1
5411 Input:
5412 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5413 computation pattern.
5414 STMT_INFO: A stmt from which the pattern search should start.
5416 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5417 a sequence of statements that has the same functionality and can be
5418 used to replace STMT_INFO. It returns the last statement in the sequence
5419 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5420 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5421 statement, having first checked that the target supports the new operation
5422 in that type.
5424 This function also does some bookkeeping, as explained in the documentation
5425 for vect_recog_pattern. */
5427 static void
5428 vect_pattern_recog_1 (vec_info *vinfo,
5429 vect_recog_func *recog_func, stmt_vec_info stmt_info)
5431 gimple *pattern_stmt;
5432 loop_vec_info loop_vinfo;
5433 tree pattern_vectype;
5435 /* If this statement has already been replaced with pattern statements,
5436 leave the original statement alone, since the first match wins.
5437 Instead try to match against the definition statements that feed
5438 the main pattern statement. */
5439 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5441 gimple_stmt_iterator gsi;
5442 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5443 !gsi_end_p (gsi); gsi_next (&gsi))
5444 vect_pattern_recog_1 (vinfo, recog_func,
5445 vinfo->lookup_stmt (gsi_stmt (gsi)));
5446 return;
5449 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5450 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5451 if (!pattern_stmt)
5453 /* Clear any half-formed pattern definition sequence. */
5454 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5455 return;
5458 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5459 gcc_assert (pattern_vectype);
5461 /* Found a vectorizable pattern. */
5462 if (dump_enabled_p ())
5463 dump_printf_loc (MSG_NOTE, vect_location,
5464 "%s pattern recognized: %G",
5465 recog_func->name, pattern_stmt);
5467 /* Mark the stmts that are involved in the pattern. */
5468 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5470 /* Patterns cannot be vectorized using SLP, because they change the order of
5471 computation. */
5472 if (loop_vinfo)
5474 unsigned ix, ix2;
5475 stmt_vec_info *elem_ptr;
5476 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5477 elem_ptr, *elem_ptr == stmt_info);
5482 /* Function vect_pattern_recog
5484 Input:
5485 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5486 computation idioms.
5488 Output - for each computation idiom that is detected we create a new stmt
5489 that provides the same functionality and that can be vectorized. We
5490 also record some information in the struct_stmt_info of the relevant
5491 stmts, as explained below:
5493 At the entry to this function we have the following stmts, with the
5494 following initial value in the STMT_VINFO fields:
5496 stmt in_pattern_p related_stmt vec_stmt
5497 S1: a_i = .... - - -
5498 S2: a_2 = ..use(a_i).. - - -
5499 S3: a_1 = ..use(a_2).. - - -
5500 S4: a_0 = ..use(a_1).. - - -
5501 S5: ... = ..use(a_0).. - - -
5503 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5504 represented by a single stmt. We then:
5505 - create a new stmt S6 equivalent to the pattern (the stmt is not
5506 inserted into the code)
5507 - fill in the STMT_VINFO fields as follows:
5509 in_pattern_p related_stmt vec_stmt
5510 S1: a_i = .... - - -
5511 S2: a_2 = ..use(a_i).. - - -
5512 S3: a_1 = ..use(a_2).. - - -
5513 S4: a_0 = ..use(a_1).. true S6 -
5514 '---> S6: a_new = .... - S4 -
5515 S5: ... = ..use(a_0).. - - -
5517 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5518 to each other through the RELATED_STMT field).
5520 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5521 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5522 remain irrelevant unless used by stmts other than S4.
5524 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5525 (because they are marked as irrelevant). It will vectorize S6, and record
5526 a pointer to the new vector stmt VS6 from S6 (as usual).
5527 S4 will be skipped, and S5 will be vectorized as usual:
5529 in_pattern_p related_stmt vec_stmt
5530 S1: a_i = .... - - -
5531 S2: a_2 = ..use(a_i).. - - -
5532 S3: a_1 = ..use(a_2).. - - -
5533 > VS6: va_new = .... - - -
5534 S4: a_0 = ..use(a_1).. true S6 VS6
5535 '---> S6: a_new = .... - S4 VS6
5536 > VS5: ... = ..vuse(va_new).. - - -
5537 S5: ... = ..use(a_0).. - - -
5539 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5540 elsewhere), and we'll end up with:
5542 VS6: va_new = ....
5543 VS5: ... = ..vuse(va_new)..
5545 In case of more than one pattern statements, e.g., widen-mult with
5546 intermediate type:
5548 S1 a_t = ;
5549 S2 a_T = (TYPE) a_t;
5550 '--> S3: a_it = (interm_type) a_t;
5551 S4 prod_T = a_T * CONST;
5552 '--> S5: prod_T' = a_it w* CONST;
5554 there may be other users of a_T outside the pattern. In that case S2 will
5555 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5556 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5557 be recorded in S3. */
5559 void
5560 vect_pattern_recog (vec_info *vinfo)
5562 class loop *loop;
5563 basic_block *bbs;
5564 unsigned int nbbs;
5565 gimple_stmt_iterator si;
5566 unsigned int i, j;
5568 vect_determine_precisions (vinfo);
5570 DUMP_VECT_SCOPE ("vect_pattern_recog");
5572 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5574 loop = LOOP_VINFO_LOOP (loop_vinfo);
5575 bbs = LOOP_VINFO_BBS (loop_vinfo);
5576 nbbs = loop->num_nodes;
5578 /* Scan through the loop stmts, applying the pattern recognition
5579 functions starting at each stmt visited: */
5580 for (i = 0; i < nbbs; i++)
5582 basic_block bb = bbs[i];
5583 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5585 if (is_gimple_debug (gsi_stmt (si)))
5586 continue;
5587 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5588 /* Scan over all generic vect_recog_xxx_pattern functions. */
5589 for (j = 0; j < NUM_PATTERNS; j++)
5590 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5591 stmt_info);
5595 else
5597 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5598 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5599 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5600 !gsi_end_p (gsi); gsi_next (&gsi))
5602 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5603 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5604 continue;
5606 /* Scan over all generic vect_recog_xxx_pattern functions. */
5607 for (j = 0; j < NUM_PATTERNS; j++)
5608 vect_pattern_recog_1 (vinfo,
5609 &vect_vect_recog_func_ptrs[j], stmt_info);
5613 /* After this no more add_stmt calls are allowed. */
5614 vinfo->stmt_vec_info_ro = true;