Fix issue for pointers to anonymous types with -fdump-ada-spec
[official-gcc.git] / gcc / tree-vect-patterns.cc
blob217bdfd7045a22578a35bb891a4318d741071872
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 #include "gimple-range.h"
53 /* Return true if we have a useful VR_RANGE range for VAR, storing it
54 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
56 static bool
57 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
59 value_range vr;
60 get_range_query (cfun)->range_of_expr (vr, var);
61 if (vr.undefined_p ())
62 vr.set_varying (TREE_TYPE (var));
63 *min_value = wi::to_wide (vr.min ());
64 *max_value = wi::to_wide (vr.max ());
65 value_range_kind vr_type = vr.kind ();
66 wide_int nonzero = get_nonzero_bits (var);
67 signop sgn = TYPE_SIGN (TREE_TYPE (var));
68 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
69 nonzero, sgn) == VR_RANGE)
71 if (dump_enabled_p ())
73 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
74 dump_printf (MSG_NOTE, " has range [");
75 dump_hex (MSG_NOTE, *min_value);
76 dump_printf (MSG_NOTE, ", ");
77 dump_hex (MSG_NOTE, *max_value);
78 dump_printf (MSG_NOTE, "]\n");
80 return true;
82 else
84 if (dump_enabled_p ())
86 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
87 dump_printf (MSG_NOTE, " has no range info\n");
89 return false;
93 /* Report that we've found an instance of pattern PATTERN in
94 statement STMT. */
96 static void
97 vect_pattern_detected (const char *name, gimple *stmt)
99 if (dump_enabled_p ())
100 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
103 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
104 return the pattern statement's stmt_vec_info. Set its vector type to
105 VECTYPE if it doesn't have one already. */
107 static stmt_vec_info
108 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
109 stmt_vec_info orig_stmt_info, tree vectype)
111 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
112 if (pattern_stmt_info == NULL)
113 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
114 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
116 pattern_stmt_info->pattern_stmt_p = true;
117 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
118 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
119 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
120 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
122 gcc_assert (!vectype
123 || (VECTOR_BOOLEAN_TYPE_P (vectype)
124 == vect_use_mask_type_p (orig_stmt_info)));
125 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
126 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
128 return pattern_stmt_info;
131 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
132 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
133 have one already. */
135 static void
136 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
137 stmt_vec_info orig_stmt_info, tree vectype)
139 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
140 STMT_VINFO_RELATED_STMT (orig_stmt_info)
141 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
144 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
145 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
146 be different from the vector type of the final pattern statement.
147 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
148 from which it was derived. */
150 static inline void
151 append_pattern_def_seq (vec_info *vinfo,
152 stmt_vec_info stmt_info, gimple *new_stmt,
153 tree vectype = NULL_TREE,
154 tree scalar_type_for_mask = NULL_TREE)
156 gcc_assert (!scalar_type_for_mask
157 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
158 if (vectype)
160 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
161 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
162 if (scalar_type_for_mask)
163 new_stmt_info->mask_precision
164 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
166 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
167 new_stmt);
170 /* The caller wants to perform new operations on vect_external variable
171 VAR, so that the result of the operations would also be vect_external.
172 Return the edge on which the operations can be performed, if one exists.
173 Return null if the operations should instead be treated as part of
174 the pattern that needs them. */
176 static edge
177 vect_get_external_def_edge (vec_info *vinfo, tree var)
179 edge e = NULL;
180 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
182 e = loop_preheader_edge (loop_vinfo->loop);
183 if (!SSA_NAME_IS_DEFAULT_DEF (var))
185 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
186 if (bb == NULL
187 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
188 e = NULL;
191 return e;
194 /* Return true if the target supports a vector version of CODE,
195 where CODE is known to map to a direct optab with the given SUBTYPE.
196 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
197 specifies the type of the scalar result.
199 If CODE allows the inputs and outputs to have different type
200 (such as for WIDEN_SUM_EXPR), it is the input mode rather
201 than the output mode that determines the appropriate target pattern.
202 Operand 0 of the target pattern then specifies the mode that the output
203 must have.
205 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
206 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
207 is nonnull. */
209 static bool
210 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
211 tree itype, tree *vecotype_out,
212 tree *vecitype_out = NULL,
213 enum optab_subtype subtype = optab_default)
215 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
216 if (!vecitype)
217 return false;
219 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
220 if (!vecotype)
221 return false;
223 optab optab = optab_for_tree_code (code, vecitype, subtype);
224 if (!optab)
225 return false;
227 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
228 if (icode == CODE_FOR_nothing
229 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
230 return false;
232 *vecotype_out = vecotype;
233 if (vecitype_out)
234 *vecitype_out = vecitype;
235 return true;
238 /* Round bit precision PRECISION up to a full element. */
240 static unsigned int
241 vect_element_precision (unsigned int precision)
243 precision = 1 << ceil_log2 (precision);
244 return MAX (precision, BITS_PER_UNIT);
247 /* If OP is defined by a statement that's being considered for vectorization,
248 return information about that statement, otherwise return NULL. */
250 static stmt_vec_info
251 vect_get_internal_def (vec_info *vinfo, tree op)
253 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
254 if (def_stmt_info
255 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
256 return def_stmt_info;
257 return NULL;
260 /* Check whether NAME, an ssa-name used in STMT_VINFO,
261 is a result of a type promotion, such that:
262 DEF_STMT: NAME = NOP (name0)
263 If CHECK_SIGN is TRUE, check that either both types are signed or both are
264 unsigned. */
266 static bool
267 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
268 tree *orig_type, gimple **def_stmt, bool *promotion)
270 tree type = TREE_TYPE (name);
271 tree oprnd0;
272 enum vect_def_type dt;
274 stmt_vec_info def_stmt_info;
275 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
276 return false;
278 if (dt != vect_internal_def
279 && dt != vect_external_def && dt != vect_constant_def)
280 return false;
282 if (!*def_stmt)
283 return false;
285 if (!is_gimple_assign (*def_stmt))
286 return false;
288 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
289 return false;
291 oprnd0 = gimple_assign_rhs1 (*def_stmt);
293 *orig_type = TREE_TYPE (oprnd0);
294 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
295 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
296 return false;
298 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
299 *promotion = true;
300 else
301 *promotion = false;
303 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
304 return false;
306 return true;
309 /* Holds information about an input operand after some sign changes
310 and type promotions have been peeled away. */
311 class vect_unpromoted_value {
312 public:
313 vect_unpromoted_value ();
315 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
317 /* The value obtained after peeling away zero or more casts. */
318 tree op;
320 /* The type of OP. */
321 tree type;
323 /* The definition type of OP. */
324 vect_def_type dt;
326 /* If OP is the result of peeling at least one cast, and if the cast
327 of OP itself is a vectorizable statement, CASTER identifies that
328 statement, otherwise it is null. */
329 stmt_vec_info caster;
332 inline vect_unpromoted_value::vect_unpromoted_value ()
333 : op (NULL_TREE),
334 type (NULL_TREE),
335 dt (vect_uninitialized_def),
336 caster (NULL)
340 /* Set the operand to OP_IN, its definition type to DT_IN, and the
341 statement that casts it to CASTER_IN. */
343 inline void
344 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
345 stmt_vec_info caster_in)
347 op = op_in;
348 type = TREE_TYPE (op);
349 dt = dt_in;
350 caster = caster_in;
353 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
354 to reach some vectorizable inner operand OP', continuing as long as it
355 is possible to convert OP' back to OP using a possible sign change
356 followed by a possible promotion P. Return this OP', or null if OP is
357 not a vectorizable SSA name. If there is a promotion P, describe its
358 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
359 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
360 have more than one user.
362 A successful return means that it is possible to go from OP' to OP
363 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
364 whereas the cast from UNPROM to OP might be a promotion, a sign
365 change, or a nop.
367 E.g. say we have:
369 signed short *ptr = ...;
370 signed short C = *ptr;
371 unsigned short B = (unsigned short) C; // sign change
372 signed int A = (signed int) B; // unsigned promotion
373 ...possible other uses of A...
374 unsigned int OP = (unsigned int) A; // sign change
376 In this case it's possible to go directly from C to OP using:
378 OP = (unsigned int) (unsigned short) C;
379 +------------+ +--------------+
380 promotion sign change
382 so OP' would be C. The input to the promotion is B, so UNPROM
383 would describe B. */
385 static tree
386 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
387 vect_unpromoted_value *unprom,
388 bool *single_use_p = NULL)
390 tree res = NULL_TREE;
391 tree op_type = TREE_TYPE (op);
392 unsigned int orig_precision = TYPE_PRECISION (op_type);
393 unsigned int min_precision = orig_precision;
394 stmt_vec_info caster = NULL;
395 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
397 /* See whether OP is simple enough to vectorize. */
398 stmt_vec_info def_stmt_info;
399 gimple *def_stmt;
400 vect_def_type dt;
401 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
402 break;
404 /* If OP is the input of a demotion, skip over it to see whether
405 OP is itself the result of a promotion. If so, the combined
406 effect of the promotion and the demotion might fit the required
407 pattern, otherwise neither operation fits.
409 This copes with cases such as the result of an arithmetic
410 operation being truncated before being stored, and where that
411 arithmetic operation has been recognized as an over-widened one. */
412 if (TYPE_PRECISION (op_type) <= min_precision)
414 /* Use OP as the UNPROM described above if we haven't yet
415 found a promotion, or if using the new input preserves the
416 sign of the previous promotion. */
417 if (!res
418 || TYPE_PRECISION (unprom->type) == orig_precision
419 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
421 unprom->set_op (op, dt, caster);
422 min_precision = TYPE_PRECISION (op_type);
424 /* Stop if we've already seen a promotion and if this
425 conversion does more than change the sign. */
426 else if (TYPE_PRECISION (op_type)
427 != TYPE_PRECISION (unprom->type))
428 break;
430 /* The sequence now extends to OP. */
431 res = op;
434 /* See whether OP is defined by a cast. Record it as CASTER if
435 the cast is potentially vectorizable. */
436 if (!def_stmt)
437 break;
438 caster = def_stmt_info;
440 /* Ignore pattern statements, since we don't link uses for them. */
441 if (caster
442 && single_use_p
443 && !STMT_VINFO_RELATED_STMT (caster)
444 && !has_single_use (res))
445 *single_use_p = false;
447 gassign *assign = dyn_cast <gassign *> (def_stmt);
448 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
449 break;
451 /* Continue with the input to the cast. */
452 op = gimple_assign_rhs1 (def_stmt);
453 op_type = TREE_TYPE (op);
455 return res;
458 /* OP is an integer operand to an operation that returns TYPE, and we
459 want to treat the operation as a widening one. So far we can treat
460 it as widening from *COMMON_TYPE.
462 Return true if OP is suitable for such a widening operation,
463 either widening from *COMMON_TYPE or from some supertype of it.
464 Update *COMMON_TYPE to the supertype in the latter case.
466 SHIFT_P is true if OP is a shift amount. */
468 static bool
469 vect_joust_widened_integer (tree type, bool shift_p, tree op,
470 tree *common_type)
472 /* Calculate the minimum precision required by OP, without changing
473 the sign of either operand. */
474 unsigned int precision;
475 if (shift_p)
477 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
478 return false;
479 precision = TREE_INT_CST_LOW (op);
481 else
483 precision = wi::min_precision (wi::to_widest (op),
484 TYPE_SIGN (*common_type));
485 if (precision * 2 > TYPE_PRECISION (type))
486 return false;
489 /* If OP requires a wider type, switch to that type. The checks
490 above ensure that this is still narrower than the result. */
491 precision = vect_element_precision (precision);
492 if (TYPE_PRECISION (*common_type) < precision)
493 *common_type = build_nonstandard_integer_type
494 (precision, TYPE_UNSIGNED (*common_type));
495 return true;
498 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
499 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
501 static bool
502 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
504 if (types_compatible_p (*common_type, new_type))
505 return true;
507 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
508 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
509 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
510 return true;
512 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
513 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
514 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
516 *common_type = new_type;
517 return true;
520 /* We have mismatched signs, with the signed type being
521 no wider than the unsigned type. In this case we need
522 a wider signed type. */
523 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
524 TYPE_PRECISION (new_type));
525 precision *= 2;
527 if (precision * 2 > TYPE_PRECISION (type))
528 return false;
530 *common_type = build_nonstandard_integer_type (precision, false);
531 return true;
534 /* Check whether STMT_INFO can be viewed as a tree of integer operations
535 in which each node either performs CODE or WIDENED_CODE, and where
536 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
537 specifies the maximum number of leaf operands. SHIFT_P says whether
538 CODE and WIDENED_CODE are some sort of shift.
540 If STMT_INFO is such a tree, return the number of leaf operands
541 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
542 to a type that (a) is narrower than the result of STMT_INFO and
543 (b) can hold all leaf operand values.
545 If SUBTYPE then allow that the signs of the operands
546 may differ in signs but not in precision. SUBTYPE is updated to reflect
547 this.
549 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
550 exists. */
552 static unsigned int
553 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
554 tree_code widened_code, bool shift_p,
555 unsigned int max_nops,
556 vect_unpromoted_value *unprom, tree *common_type,
557 enum optab_subtype *subtype = NULL)
559 /* Check for an integer operation with the right code. */
560 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
561 if (!assign)
562 return 0;
564 tree_code rhs_code = gimple_assign_rhs_code (assign);
565 if (rhs_code != code && rhs_code != widened_code)
566 return 0;
568 tree type = TREE_TYPE (gimple_assign_lhs (assign));
569 if (!INTEGRAL_TYPE_P (type))
570 return 0;
572 /* Assume that both operands will be leaf operands. */
573 max_nops -= 2;
575 /* Check the operands. */
576 unsigned int next_op = 0;
577 for (unsigned int i = 0; i < 2; ++i)
579 vect_unpromoted_value *this_unprom = &unprom[next_op];
580 unsigned int nops = 1;
581 tree op = gimple_op (assign, i + 1);
582 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
584 /* We already have a common type from earlier operands.
585 Update it to account for OP. */
586 this_unprom->set_op (op, vect_constant_def);
587 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
588 return 0;
590 else
592 /* Only allow shifts by constants. */
593 if (shift_p && i == 1)
594 return 0;
596 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
597 return 0;
599 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
601 /* The operand isn't widened. If STMT_INFO has the code
602 for an unwidened operation, recursively check whether
603 this operand is a node of the tree. */
604 if (rhs_code != code
605 || max_nops == 0
606 || this_unprom->dt != vect_internal_def)
607 return 0;
609 /* Give back the leaf slot allocated above now that we're
610 not treating this as a leaf operand. */
611 max_nops += 1;
613 /* Recursively process the definition of the operand. */
614 stmt_vec_info def_stmt_info
615 = vinfo->lookup_def (this_unprom->op);
616 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
617 widened_code, shift_p, max_nops,
618 this_unprom, common_type,
619 subtype);
620 if (nops == 0)
621 return 0;
623 max_nops -= nops;
625 else
627 /* Make sure that the operand is narrower than the result. */
628 if (TYPE_PRECISION (this_unprom->type) * 2
629 > TYPE_PRECISION (type))
630 return 0;
632 /* Update COMMON_TYPE for the new operand. */
633 if (i == 0)
634 *common_type = this_unprom->type;
635 else if (!vect_joust_widened_type (type, this_unprom->type,
636 common_type))
638 if (subtype)
640 /* See if we can sign extend the smaller type. */
641 if (TYPE_PRECISION (this_unprom->type)
642 > TYPE_PRECISION (*common_type))
643 *common_type = this_unprom->type;
644 *subtype = optab_vector_mixed_sign;
646 else
647 return 0;
651 next_op += nops;
653 return next_op;
656 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
657 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
659 static tree
660 vect_recog_temp_ssa_var (tree type, gimple *stmt)
662 return make_temp_ssa_name (type, stmt, "patt");
665 /* STMT2_INFO describes a type conversion that could be split into STMT1
666 followed by a version of STMT2_INFO that takes NEW_RHS as its first
667 input. Try to do this using pattern statements, returning true on
668 success. */
670 static bool
671 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
672 gimple *stmt1, tree vectype)
674 if (is_pattern_stmt_p (stmt2_info))
676 /* STMT2_INFO is part of a pattern. Get the statement to which
677 the pattern is attached. */
678 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
679 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
681 if (dump_enabled_p ())
682 dump_printf_loc (MSG_NOTE, vect_location,
683 "Splitting pattern statement: %G", stmt2_info->stmt);
685 /* Since STMT2_INFO is a pattern statement, we can change it
686 in-situ without worrying about changing the code for the
687 containing block. */
688 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
693 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
694 stmt2_info->stmt);
697 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
698 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
699 /* STMT2_INFO is the actual pattern statement. Add STMT1
700 to the end of the definition sequence. */
701 gimple_seq_add_stmt_without_update (def_seq, stmt1);
702 else
704 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
705 before it. */
706 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
707 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
709 return true;
711 else
713 /* STMT2_INFO doesn't yet have a pattern. Try to create a
714 two-statement pattern now. */
715 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
716 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
717 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
718 if (!lhs_vectype)
719 return false;
721 if (dump_enabled_p ())
722 dump_printf_loc (MSG_NOTE, vect_location,
723 "Splitting statement: %G", stmt2_info->stmt);
725 /* Add STMT1 as a singleton pattern definition sequence. */
726 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
727 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
728 gimple_seq_add_stmt_without_update (def_seq, stmt1);
730 /* Build the second of the two pattern statements. */
731 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
732 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
733 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
735 if (dump_enabled_p ())
737 dump_printf_loc (MSG_NOTE, vect_location,
738 "into pattern statements: %G", stmt1);
739 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
742 return true;
746 /* Convert UNPROM to TYPE and return the result, adding new statements
747 to STMT_INFO's pattern definition statements if no better way is
748 available. VECTYPE is the vector form of TYPE.
750 If SUBTYPE then convert the type based on the subtype. */
752 static tree
753 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
754 vect_unpromoted_value *unprom, tree vectype,
755 enum optab_subtype subtype = optab_default)
758 /* Update the type if the signs differ. */
759 if (subtype == optab_vector_mixed_sign
760 && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
761 type = build_nonstandard_integer_type (TYPE_PRECISION (type),
762 TYPE_SIGN (unprom->type));
764 /* Check for a no-op conversion. */
765 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
766 return unprom->op;
768 /* Allow the caller to create constant vect_unpromoted_values. */
769 if (TREE_CODE (unprom->op) == INTEGER_CST)
770 return wide_int_to_tree (type, wi::to_widest (unprom->op));
772 tree input = unprom->op;
773 if (unprom->caster)
775 tree lhs = gimple_get_lhs (unprom->caster->stmt);
776 tree lhs_type = TREE_TYPE (lhs);
778 /* If the result of the existing cast is the right width, use it
779 instead of the source of the cast. */
780 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
781 input = lhs;
782 /* If the precision we want is between the source and result
783 precisions of the existing cast, try splitting the cast into
784 two and tapping into a mid-way point. */
785 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
786 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
788 /* In order to preserve the semantics of the original cast,
789 give the mid-way point the same signedness as the input value.
791 It would be possible to use a signed type here instead if
792 TYPE is signed and UNPROM->TYPE is unsigned, but that would
793 make the sign of the midtype sensitive to the order in
794 which we process the statements, since the signedness of
795 TYPE is the signedness required by just one of possibly
796 many users. Also, unsigned promotions are usually as cheap
797 as or cheaper than signed ones, so it's better to keep an
798 unsigned promotion. */
799 tree midtype = build_nonstandard_integer_type
800 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
801 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
802 if (vec_midtype)
804 input = vect_recog_temp_ssa_var (midtype, NULL);
805 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
806 unprom->op);
807 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
808 vec_midtype))
809 append_pattern_def_seq (vinfo, stmt_info,
810 new_stmt, vec_midtype);
814 /* See if we can reuse an existing result. */
815 if (types_compatible_p (type, TREE_TYPE (input)))
816 return input;
819 /* We need a new conversion statement. */
820 tree new_op = vect_recog_temp_ssa_var (type, NULL);
821 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
823 /* If OP is an external value, see if we can insert the new statement
824 on an incoming edge. */
825 if (input == unprom->op && unprom->dt == vect_external_def)
826 if (edge e = vect_get_external_def_edge (vinfo, input))
828 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
829 gcc_assert (!new_bb);
830 return new_op;
833 /* As a (common) last resort, add the statement to the pattern itself. */
834 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
835 return new_op;
838 /* Invoke vect_convert_input for N elements of UNPROM and store the
839 result in the corresponding elements of RESULT.
841 If SUBTYPE then convert the type based on the subtype. */
843 static void
844 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
845 tree *result, tree type, vect_unpromoted_value *unprom,
846 tree vectype, enum optab_subtype subtype = optab_default)
848 for (unsigned int i = 0; i < n; ++i)
850 unsigned int j;
851 for (j = 0; j < i; ++j)
852 if (unprom[j].op == unprom[i].op)
853 break;
855 if (j < i)
856 result[i] = result[j];
857 else
858 result[i] = vect_convert_input (vinfo, stmt_info,
859 type, &unprom[i], vectype, subtype);
863 /* The caller has created a (possibly empty) sequence of pattern definition
864 statements followed by a single statement PATTERN_STMT. Cast the result
865 of this final statement to TYPE. If a new statement is needed, add
866 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
867 and return the new statement, otherwise return PATTERN_STMT as-is.
868 VECITYPE is the vector form of PATTERN_STMT's result type. */
870 static gimple *
871 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
872 gimple *pattern_stmt, tree vecitype)
874 tree lhs = gimple_get_lhs (pattern_stmt);
875 if (!types_compatible_p (type, TREE_TYPE (lhs)))
877 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
878 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
879 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
881 return pattern_stmt;
884 /* Return true if STMT_VINFO describes a reduction for which reassociation
885 is allowed. If STMT_INFO is part of a group, assume that it's part of
886 a reduction chain and optimistically assume that all statements
887 except the last allow reassociation.
888 Also require it to have code CODE and to be a reduction
889 in the outermost loop. When returning true, store the operands in
890 *OP0_OUT and *OP1_OUT. */
892 static bool
893 vect_reassociating_reduction_p (vec_info *vinfo,
894 stmt_vec_info stmt_info, tree_code code,
895 tree *op0_out, tree *op1_out)
897 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
898 if (!loop_info)
899 return false;
901 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
902 if (!assign || gimple_assign_rhs_code (assign) != code)
903 return false;
905 /* We don't allow changing the order of the computation in the inner-loop
906 when doing outer-loop vectorization. */
907 class loop *loop = LOOP_VINFO_LOOP (loop_info);
908 if (loop && nested_in_vect_loop_p (loop, stmt_info))
909 return false;
911 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
913 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
914 code))
915 return false;
917 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
918 return false;
920 *op0_out = gimple_assign_rhs1 (assign);
921 *op1_out = gimple_assign_rhs2 (assign);
922 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
923 std::swap (*op0_out, *op1_out);
924 return true;
927 /* match.pd function to match
928 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
929 with conditions:
930 1) @1, @2, c, d, a, b are all integral type.
931 2) There's single_use for both @1 and @2.
932 3) a, c have same precision.
933 4) c and @1 have different precision.
934 5) c, d are the same type or they can differ in sign when convert is
935 truncation.
937 record a and c and d and @3. */
939 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
941 /* Function vect_recog_cond_expr_convert
943 Try to find the following pattern:
945 TYPE_AB A,B;
946 TYPE_CD C,D;
947 TYPE_E E;
948 TYPE_E op_true = (TYPE_E) A;
949 TYPE_E op_false = (TYPE_E) B;
951 E = C cmp D ? op_true : op_false;
953 where
954 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
955 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
956 single_use of op_true and op_false.
957 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
959 Input:
961 * STMT_VINFO: The stmt from which the pattern search begins.
962 here it starts with E = c cmp D ? op_true : op_false;
964 Output:
966 TYPE1 E' = C cmp D ? A : B;
967 TYPE3 E = (TYPE3) E';
969 There may extra nop_convert for A or B to handle different signness.
971 * TYPE_OUT: The vector type of the output of this pattern.
973 * Return value: A new stmt that will be used to replace the sequence of
974 stmts that constitute the pattern. In this case it will be:
975 E = (TYPE3)E';
976 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
978 static gimple *
979 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
980 stmt_vec_info stmt_vinfo, tree *type_out)
982 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
983 tree lhs, match[4], temp, type, new_lhs, op2;
984 gimple *cond_stmt;
985 gimple *pattern_stmt;
987 if (!last_stmt)
988 return NULL;
990 lhs = gimple_assign_lhs (last_stmt);
992 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
993 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
994 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
995 return NULL;
997 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
999 op2 = match[2];
1000 type = TREE_TYPE (match[1]);
1001 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1003 op2 = vect_recog_temp_ssa_var (type, NULL);
1004 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1005 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1006 get_vectype_for_scalar_type (vinfo, type));
1009 temp = vect_recog_temp_ssa_var (type, NULL);
1010 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1011 match[1], op2));
1012 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1013 get_vectype_for_scalar_type (vinfo, type));
1014 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1015 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1016 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE, vect_location,
1020 "created pattern stmt: %G", pattern_stmt);
1021 return pattern_stmt;
1024 /* Function vect_recog_dot_prod_pattern
1026 Try to find the following pattern:
1028 type1a x_t
1029 type1b y_t;
1030 TYPE1 prod;
1031 TYPE2 sum = init;
1032 loop:
1033 sum_0 = phi <init, sum_1>
1034 S1 x_t = ...
1035 S2 y_t = ...
1036 S3 x_T = (TYPE1) x_t;
1037 S4 y_T = (TYPE1) y_t;
1038 S5 prod = x_T * y_T;
1039 [S6 prod = (TYPE2) prod; #optional]
1040 S7 sum_1 = prod + sum_0;
1042 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1043 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1044 'type1a' and 'type1b' can differ.
1046 Input:
1048 * STMT_VINFO: The stmt from which the pattern search begins. In the
1049 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1050 will be detected.
1052 Output:
1054 * TYPE_OUT: The type of the output of this pattern.
1056 * Return value: A new stmt that will be used to replace the sequence of
1057 stmts that constitute the pattern. In this case it will be:
1058 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1060 Note: The dot-prod idiom is a widening reduction pattern that is
1061 vectorized without preserving all the intermediate results. It
1062 produces only N/2 (widened) results (by summing up pairs of
1063 intermediate results) rather than all N results. Therefore, we
1064 cannot allow this pattern when we want to get all the results and in
1065 the correct order (as is the case when this computation is in an
1066 inner-loop nested in an outer-loop that us being vectorized). */
1068 static gimple *
1069 vect_recog_dot_prod_pattern (vec_info *vinfo,
1070 stmt_vec_info stmt_vinfo, tree *type_out)
1072 tree oprnd0, oprnd1;
1073 gimple *last_stmt = stmt_vinfo->stmt;
1074 tree type, half_type;
1075 gimple *pattern_stmt;
1076 tree var;
1078 /* Look for the following pattern
1079 DX = (TYPE1) X;
1080 DY = (TYPE1) Y;
1081 DPROD = DX * DY;
1082 DDPROD = (TYPE2) DPROD;
1083 sum_1 = DDPROD + sum_0;
1084 In which
1085 - DX is double the size of X
1086 - DY is double the size of Y
1087 - DX, DY, DPROD all have the same type but the sign
1088 between X, Y and DPROD can differ.
1089 - sum is the same size of DPROD or bigger
1090 - sum has been recognized as a reduction variable.
1092 This is equivalent to:
1093 DPROD = X w* Y; #widen mult
1094 sum_1 = DPROD w+ sum_0; #widen summation
1096 DPROD = X w* Y; #widen mult
1097 sum_1 = DPROD + sum_0; #summation
1100 /* Starting from LAST_STMT, follow the defs of its uses in search
1101 of the above pattern. */
1103 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1104 &oprnd0, &oprnd1))
1105 return NULL;
1107 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1109 vect_unpromoted_value unprom_mult;
1110 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1112 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1113 we know that oprnd1 is the reduction variable (defined by a loop-header
1114 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1115 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1116 if (!oprnd0)
1117 return NULL;
1119 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1120 if (!mult_vinfo)
1121 return NULL;
1123 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1124 inside the loop (in case we are analyzing an outer-loop). */
1125 vect_unpromoted_value unprom0[2];
1126 enum optab_subtype subtype = optab_vector;
1127 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1128 false, 2, unprom0, &half_type, &subtype))
1129 return NULL;
1131 /* If there are two widening operations, make sure they agree on the sign
1132 of the extension. The result of an optab_vector_mixed_sign operation
1133 is signed; otherwise, the result has the same sign as the operands. */
1134 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1135 && (subtype == optab_vector_mixed_sign
1136 ? TYPE_UNSIGNED (unprom_mult.type)
1137 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1138 return NULL;
1140 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1142 tree half_vectype;
1143 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1144 type_out, &half_vectype, subtype))
1145 return NULL;
1147 /* Get the inputs in the appropriate types. */
1148 tree mult_oprnd[2];
1149 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1150 unprom0, half_vectype, subtype);
1152 var = vect_recog_temp_ssa_var (type, NULL);
1153 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1154 mult_oprnd[0], mult_oprnd[1], oprnd1);
1156 return pattern_stmt;
1160 /* Function vect_recog_sad_pattern
1162 Try to find the following Sum of Absolute Difference (SAD) pattern:
1164 type x_t, y_t;
1165 signed TYPE1 diff, abs_diff;
1166 TYPE2 sum = init;
1167 loop:
1168 sum_0 = phi <init, sum_1>
1169 S1 x_t = ...
1170 S2 y_t = ...
1171 S3 x_T = (TYPE1) x_t;
1172 S4 y_T = (TYPE1) y_t;
1173 S5 diff = x_T - y_T;
1174 S6 abs_diff = ABS_EXPR <diff>;
1175 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1176 S8 sum_1 = abs_diff + sum_0;
1178 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1179 same size of 'TYPE1' or bigger. This is a special case of a reduction
1180 computation.
1182 Input:
1184 * STMT_VINFO: The stmt from which the pattern search begins. In the
1185 example, when this function is called with S8, the pattern
1186 {S3,S4,S5,S6,S7,S8} will be detected.
1188 Output:
1190 * TYPE_OUT: The type of the output of this pattern.
1192 * Return value: A new stmt that will be used to replace the sequence of
1193 stmts that constitute the pattern. In this case it will be:
1194 SAD_EXPR <x_t, y_t, sum_0>
1197 static gimple *
1198 vect_recog_sad_pattern (vec_info *vinfo,
1199 stmt_vec_info stmt_vinfo, tree *type_out)
1201 gimple *last_stmt = stmt_vinfo->stmt;
1202 tree half_type;
1204 /* Look for the following pattern
1205 DX = (TYPE1) X;
1206 DY = (TYPE1) Y;
1207 DDIFF = DX - DY;
1208 DAD = ABS_EXPR <DDIFF>;
1209 DDPROD = (TYPE2) DPROD;
1210 sum_1 = DAD + sum_0;
1211 In which
1212 - DX is at least double the size of X
1213 - DY is at least double the size of Y
1214 - DX, DY, DDIFF, DAD all have the same type
1215 - sum is the same size of DAD or bigger
1216 - sum has been recognized as a reduction variable.
1218 This is equivalent to:
1219 DDIFF = X w- Y; #widen sub
1220 DAD = ABS_EXPR <DDIFF>;
1221 sum_1 = DAD w+ sum_0; #widen summation
1223 DDIFF = X w- Y; #widen sub
1224 DAD = ABS_EXPR <DDIFF>;
1225 sum_1 = DAD + sum_0; #summation
1228 /* Starting from LAST_STMT, follow the defs of its uses in search
1229 of the above pattern. */
1231 tree plus_oprnd0, plus_oprnd1;
1232 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1233 &plus_oprnd0, &plus_oprnd1))
1234 return NULL;
1236 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1238 /* Any non-truncating sequence of conversions is OK here, since
1239 with a successful match, the result of the ABS(U) is known to fit
1240 within the nonnegative range of the result type. (It cannot be the
1241 negative of the minimum signed value due to the range of the widening
1242 MINUS_EXPR.) */
1243 vect_unpromoted_value unprom_abs;
1244 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1245 &unprom_abs);
1247 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1248 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1249 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1250 Then check that plus_oprnd0 is defined by an abs_expr. */
1252 if (!plus_oprnd0)
1253 return NULL;
1255 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1256 if (!abs_stmt_vinfo)
1257 return NULL;
1259 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1260 inside the loop (in case we are analyzing an outer-loop). */
1261 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1262 if (!abs_stmt
1263 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1264 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1265 return NULL;
1267 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1268 tree abs_type = TREE_TYPE (abs_oprnd);
1269 if (TYPE_UNSIGNED (abs_type))
1270 return NULL;
1272 /* Peel off conversions from the ABS input. This can involve sign
1273 changes (e.g. from an unsigned subtraction to a signed ABS input)
1274 or signed promotion, but it can't include unsigned promotion.
1275 (Note that ABS of an unsigned promotion should have been folded
1276 away before now anyway.) */
1277 vect_unpromoted_value unprom_diff;
1278 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1279 &unprom_diff);
1280 if (!abs_oprnd)
1281 return NULL;
1282 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1283 && TYPE_UNSIGNED (unprom_diff.type))
1284 return NULL;
1286 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1287 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1288 if (!diff_stmt_vinfo)
1289 return NULL;
1291 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1292 inside the loop (in case we are analyzing an outer-loop). */
1293 vect_unpromoted_value unprom[2];
1294 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1295 false, 2, unprom, &half_type))
1296 return NULL;
1298 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1300 tree half_vectype;
1301 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1302 type_out, &half_vectype))
1303 return NULL;
1305 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1306 tree sad_oprnd[2];
1307 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1308 unprom, half_vectype);
1310 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1311 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1312 sad_oprnd[1], plus_oprnd1);
1314 return pattern_stmt;
1317 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1318 so that it can be treated as though it had the form:
1320 A_TYPE a;
1321 B_TYPE b;
1322 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1323 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1324 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1325 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1326 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1328 Try to replace the pattern with:
1330 A_TYPE a;
1331 B_TYPE b;
1332 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1333 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1334 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1335 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1337 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1339 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1340 name of the pattern being matched, for dump purposes. */
1342 static gimple *
1343 vect_recog_widen_op_pattern (vec_info *vinfo,
1344 stmt_vec_info last_stmt_info, tree *type_out,
1345 tree_code orig_code, tree_code wide_code,
1346 bool shift_p, const char *name)
1348 gimple *last_stmt = last_stmt_info->stmt;
1350 vect_unpromoted_value unprom[2];
1351 tree half_type;
1352 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1353 shift_p, 2, unprom, &half_type))
1354 return NULL;
1356 /* Pattern detected. */
1357 vect_pattern_detected (name, last_stmt);
1359 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1360 tree itype = type;
1361 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1362 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1363 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1364 TYPE_UNSIGNED (half_type));
1366 /* Check target support */
1367 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1368 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1369 tree ctype = itype;
1370 tree vecctype = vecitype;
1371 if (orig_code == MINUS_EXPR
1372 && TYPE_UNSIGNED (itype)
1373 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1375 /* Subtraction is special, even if half_type is unsigned and no matter
1376 whether type is signed or unsigned, if type is wider than itype,
1377 we need to sign-extend from the widening operation result to the
1378 result type.
1379 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1380 itype unsigned short and type either int or unsigned int.
1381 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1382 (unsigned short) 0xffff, but for type int we want the result -1
1383 and for type unsigned int 0xffffffff rather than 0xffff. */
1384 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1385 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1388 enum tree_code dummy_code;
1389 int dummy_int;
1390 auto_vec<tree> dummy_vec;
1391 if (!vectype
1392 || !vecitype
1393 || !vecctype
1394 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1395 vecitype, vectype,
1396 &dummy_code, &dummy_code,
1397 &dummy_int, &dummy_vec))
1398 return NULL;
1400 *type_out = get_vectype_for_scalar_type (vinfo, type);
1401 if (!*type_out)
1402 return NULL;
1404 tree oprnd[2];
1405 vect_convert_inputs (vinfo, last_stmt_info,
1406 2, oprnd, half_type, unprom, vectype);
1408 tree var = vect_recog_temp_ssa_var (itype, NULL);
1409 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1410 oprnd[0], oprnd[1]);
1412 if (vecctype != vecitype)
1413 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1414 pattern_stmt, vecitype);
1416 return vect_convert_output (vinfo, last_stmt_info,
1417 type, pattern_stmt, vecctype);
1420 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1421 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1423 static gimple *
1424 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1425 tree *type_out)
1427 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1428 MULT_EXPR, WIDEN_MULT_EXPR, false,
1429 "vect_recog_widen_mult_pattern");
1432 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1433 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1435 static gimple *
1436 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1437 tree *type_out)
1439 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1440 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1441 "vect_recog_widen_plus_pattern");
1444 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1445 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1446 static gimple *
1447 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1448 tree *type_out)
1450 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1451 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1452 "vect_recog_widen_minus_pattern");
1455 /* Function vect_recog_popcount_pattern
1457 Try to find the following pattern:
1459 UTYPE1 A;
1460 TYPE1 B;
1461 UTYPE2 temp_in;
1462 TYPE3 temp_out;
1463 temp_in = (UTYPE2)A;
1465 temp_out = __builtin_popcount{,l,ll} (temp_in);
1466 B = (TYPE1) temp_out;
1468 TYPE2 may or may not be equal to TYPE3.
1469 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1470 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1472 Input:
1474 * STMT_VINFO: The stmt from which the pattern search begins.
1475 here it starts with B = (TYPE1) temp_out;
1477 Output:
1479 * TYPE_OUT: The vector type of the output of this pattern.
1481 * Return value: A new stmt that will be used to replace the sequence of
1482 stmts that constitute the pattern. In this case it will be:
1483 B = .POPCOUNT (A);
1486 static gimple *
1487 vect_recog_popcount_pattern (vec_info *vinfo,
1488 stmt_vec_info stmt_vinfo, tree *type_out)
1490 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1491 gimple *popcount_stmt, *pattern_stmt;
1492 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1493 auto_vec<tree> vargs;
1495 /* Find B = (TYPE1) temp_out. */
1496 if (!last_stmt)
1497 return NULL;
1498 tree_code code = gimple_assign_rhs_code (last_stmt);
1499 if (!CONVERT_EXPR_CODE_P (code))
1500 return NULL;
1502 lhs_oprnd = gimple_assign_lhs (last_stmt);
1503 lhs_type = TREE_TYPE (lhs_oprnd);
1504 if (!INTEGRAL_TYPE_P (lhs_type))
1505 return NULL;
1507 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1508 if (TREE_CODE (rhs_oprnd) != SSA_NAME
1509 || !has_single_use (rhs_oprnd))
1510 return NULL;
1511 popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1513 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1514 if (!is_gimple_call (popcount_stmt))
1515 return NULL;
1516 switch (gimple_call_combined_fn (popcount_stmt))
1518 CASE_CFN_POPCOUNT:
1519 break;
1520 default:
1521 return NULL;
1524 if (gimple_call_num_args (popcount_stmt) != 1)
1525 return NULL;
1527 rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1528 vect_unpromoted_value unprom_diff;
1529 rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1530 &unprom_diff);
1532 if (!rhs_origin)
1533 return NULL;
1535 /* Input and output of .POPCOUNT should be same-precision integer.
1536 Also A should be unsigned or same precision as temp_in,
1537 otherwise there would be sign_extend from A to temp_in. */
1538 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1539 || (!TYPE_UNSIGNED (unprom_diff.type)
1540 && (TYPE_PRECISION (unprom_diff.type)
1541 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1542 return NULL;
1543 vargs.safe_push (unprom_diff.op);
1545 vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1546 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1547 /* Do it only if the backend has popcount<vector_mode>2 pattern. */
1548 if (!vec_type
1549 || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1550 OPTIMIZE_FOR_SPEED))
1551 return NULL;
1553 /* Create B = .POPCOUNT (A). */
1554 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1555 pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1556 gimple_call_set_lhs (pattern_stmt, new_var);
1557 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1558 *type_out = vec_type;
1560 if (dump_enabled_p ())
1561 dump_printf_loc (MSG_NOTE, vect_location,
1562 "created pattern stmt: %G", pattern_stmt);
1563 return pattern_stmt;
1566 /* Function vect_recog_pow_pattern
1568 Try to find the following pattern:
1570 x = POW (y, N);
1572 with POW being one of pow, powf, powi, powif and N being
1573 either 2 or 0.5.
1575 Input:
1577 * STMT_VINFO: The stmt from which the pattern search begins.
1579 Output:
1581 * TYPE_OUT: The type of the output of this pattern.
1583 * Return value: A new stmt that will be used to replace the sequence of
1584 stmts that constitute the pattern. In this case it will be:
1585 x = x * x
1587 x = sqrt (x)
1590 static gimple *
1591 vect_recog_pow_pattern (vec_info *vinfo,
1592 stmt_vec_info stmt_vinfo, tree *type_out)
1594 gimple *last_stmt = stmt_vinfo->stmt;
1595 tree base, exp;
1596 gimple *stmt;
1597 tree var;
1599 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1600 return NULL;
1602 switch (gimple_call_combined_fn (last_stmt))
1604 CASE_CFN_POW:
1605 CASE_CFN_POWI:
1606 break;
1608 default:
1609 return NULL;
1612 base = gimple_call_arg (last_stmt, 0);
1613 exp = gimple_call_arg (last_stmt, 1);
1614 if (TREE_CODE (exp) != REAL_CST
1615 && TREE_CODE (exp) != INTEGER_CST)
1617 if (flag_unsafe_math_optimizations
1618 && TREE_CODE (base) == REAL_CST
1619 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1621 combined_fn log_cfn;
1622 built_in_function exp_bfn;
1623 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1625 case BUILT_IN_POW:
1626 log_cfn = CFN_BUILT_IN_LOG;
1627 exp_bfn = BUILT_IN_EXP;
1628 break;
1629 case BUILT_IN_POWF:
1630 log_cfn = CFN_BUILT_IN_LOGF;
1631 exp_bfn = BUILT_IN_EXPF;
1632 break;
1633 case BUILT_IN_POWL:
1634 log_cfn = CFN_BUILT_IN_LOGL;
1635 exp_bfn = BUILT_IN_EXPL;
1636 break;
1637 default:
1638 return NULL;
1640 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1641 tree exp_decl = builtin_decl_implicit (exp_bfn);
1642 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1643 does that, but if C is a power of 2, we want to use
1644 exp2 (log2 (C) * x) in the non-vectorized version, but for
1645 vectorization we don't have vectorized exp2. */
1646 if (logc
1647 && TREE_CODE (logc) == REAL_CST
1648 && exp_decl
1649 && lookup_attribute ("omp declare simd",
1650 DECL_ATTRIBUTES (exp_decl)))
1652 cgraph_node *node = cgraph_node::get_create (exp_decl);
1653 if (node->simd_clones == NULL)
1655 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1656 || node->definition)
1657 return NULL;
1658 expand_simd_clones (node);
1659 if (node->simd_clones == NULL)
1660 return NULL;
1662 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1663 if (!*type_out)
1664 return NULL;
1665 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1666 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1667 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1668 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1669 g = gimple_build_call (exp_decl, 1, def);
1670 gimple_call_set_lhs (g, res);
1671 return g;
1675 return NULL;
1678 /* We now have a pow or powi builtin function call with a constant
1679 exponent. */
1681 /* Catch squaring. */
1682 if ((tree_fits_shwi_p (exp)
1683 && tree_to_shwi (exp) == 2)
1684 || (TREE_CODE (exp) == REAL_CST
1685 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1687 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1688 TREE_TYPE (base), type_out))
1689 return NULL;
1691 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1692 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1693 return stmt;
1696 /* Catch square root. */
1697 if (TREE_CODE (exp) == REAL_CST
1698 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1700 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1701 if (*type_out
1702 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1703 OPTIMIZE_FOR_SPEED))
1705 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1706 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1707 gimple_call_set_lhs (stmt, var);
1708 gimple_call_set_nothrow (stmt, true);
1709 return stmt;
1713 return NULL;
1717 /* Function vect_recog_widen_sum_pattern
1719 Try to find the following pattern:
1721 type x_t;
1722 TYPE x_T, sum = init;
1723 loop:
1724 sum_0 = phi <init, sum_1>
1725 S1 x_t = *p;
1726 S2 x_T = (TYPE) x_t;
1727 S3 sum_1 = x_T + sum_0;
1729 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1730 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1731 a special case of a reduction computation.
1733 Input:
1735 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1736 when this function is called with S3, the pattern {S2,S3} will be detected.
1738 Output:
1740 * TYPE_OUT: The type of the output of this pattern.
1742 * Return value: A new stmt that will be used to replace the sequence of
1743 stmts that constitute the pattern. In this case it will be:
1744 WIDEN_SUM <x_t, sum_0>
1746 Note: The widening-sum idiom is a widening reduction pattern that is
1747 vectorized without preserving all the intermediate results. It
1748 produces only N/2 (widened) results (by summing up pairs of
1749 intermediate results) rather than all N results. Therefore, we
1750 cannot allow this pattern when we want to get all the results and in
1751 the correct order (as is the case when this computation is in an
1752 inner-loop nested in an outer-loop that us being vectorized). */
1754 static gimple *
1755 vect_recog_widen_sum_pattern (vec_info *vinfo,
1756 stmt_vec_info stmt_vinfo, tree *type_out)
1758 gimple *last_stmt = stmt_vinfo->stmt;
1759 tree oprnd0, oprnd1;
1760 tree type;
1761 gimple *pattern_stmt;
1762 tree var;
1764 /* Look for the following pattern
1765 DX = (TYPE) X;
1766 sum_1 = DX + sum_0;
1767 In which DX is at least double the size of X, and sum_1 has been
1768 recognized as a reduction variable.
1771 /* Starting from LAST_STMT, follow the defs of its uses in search
1772 of the above pattern. */
1774 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1775 &oprnd0, &oprnd1))
1776 return NULL;
1778 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1780 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1781 we know that oprnd1 is the reduction variable (defined by a loop-header
1782 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1783 Left to check that oprnd0 is defined by a cast from type 'type' to type
1784 'TYPE'. */
1786 vect_unpromoted_value unprom0;
1787 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1788 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1789 return NULL;
1791 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1793 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1794 unprom0.type, type_out))
1795 return NULL;
1797 var = vect_recog_temp_ssa_var (type, NULL);
1798 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1800 return pattern_stmt;
1803 /* Recognize cases in which an operation is performed in one type WTYPE
1804 but could be done more efficiently in a narrower type NTYPE. For example,
1805 if we have:
1807 ATYPE a; // narrower than NTYPE
1808 BTYPE b; // narrower than NTYPE
1809 WTYPE aw = (WTYPE) a;
1810 WTYPE bw = (WTYPE) b;
1811 WTYPE res = aw + bw; // only uses of aw and bw
1813 then it would be more efficient to do:
1815 NTYPE an = (NTYPE) a;
1816 NTYPE bn = (NTYPE) b;
1817 NTYPE resn = an + bn;
1818 WTYPE res = (WTYPE) resn;
1820 Other situations include things like:
1822 ATYPE a; // NTYPE or narrower
1823 WTYPE aw = (WTYPE) a;
1824 WTYPE res = aw + b;
1826 when only "(NTYPE) res" is significant. In that case it's more efficient
1827 to truncate "b" and do the operation on NTYPE instead:
1829 NTYPE an = (NTYPE) a;
1830 NTYPE bn = (NTYPE) b; // truncation
1831 NTYPE resn = an + bn;
1832 WTYPE res = (WTYPE) resn;
1834 All users of "res" should then use "resn" instead, making the final
1835 statement dead (not marked as relevant). The final statement is still
1836 needed to maintain the type correctness of the IR.
1838 vect_determine_precisions has already determined the minimum
1839 precison of the operation and the minimum precision required
1840 by users of the result. */
1842 static gimple *
1843 vect_recog_over_widening_pattern (vec_info *vinfo,
1844 stmt_vec_info last_stmt_info, tree *type_out)
1846 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1847 if (!last_stmt)
1848 return NULL;
1850 /* See whether we have found that this operation can be done on a
1851 narrower type without changing its semantics. */
1852 unsigned int new_precision = last_stmt_info->operation_precision;
1853 if (!new_precision)
1854 return NULL;
1856 tree lhs = gimple_assign_lhs (last_stmt);
1857 tree type = TREE_TYPE (lhs);
1858 tree_code code = gimple_assign_rhs_code (last_stmt);
1860 /* Punt for reductions where we don't handle the type conversions. */
1861 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1862 return NULL;
1864 /* Keep the first operand of a COND_EXPR as-is: only the other two
1865 operands are interesting. */
1866 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1868 /* Check the operands. */
1869 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1870 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1871 unprom.quick_grow (nops);
1872 unsigned int min_precision = 0;
1873 bool single_use_p = false;
1874 for (unsigned int i = 0; i < nops; ++i)
1876 tree op = gimple_op (last_stmt, first_op + i);
1877 if (TREE_CODE (op) == INTEGER_CST)
1878 unprom[i].set_op (op, vect_constant_def);
1879 else if (TREE_CODE (op) == SSA_NAME)
1881 bool op_single_use_p = true;
1882 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1883 &op_single_use_p))
1884 return NULL;
1885 /* If:
1887 (1) N bits of the result are needed;
1888 (2) all inputs are widened from M<N bits; and
1889 (3) one operand OP is a single-use SSA name
1891 we can shift the M->N widening from OP to the output
1892 without changing the number or type of extensions involved.
1893 This then reduces the number of copies of STMT_INFO.
1895 If instead of (3) more than one operand is a single-use SSA name,
1896 shifting the extension to the output is even more of a win.
1898 If instead:
1900 (1) N bits of the result are needed;
1901 (2) one operand OP2 is widened from M2<N bits;
1902 (3) another operand OP1 is widened from M1<M2 bits; and
1903 (4) both OP1 and OP2 are single-use
1905 the choice is between:
1907 (a) truncating OP2 to M1, doing the operation on M1,
1908 and then widening the result to N
1910 (b) widening OP1 to M2, doing the operation on M2, and then
1911 widening the result to N
1913 Both shift the M2->N widening of the inputs to the output.
1914 (a) additionally shifts the M1->M2 widening to the output;
1915 it requires fewer copies of STMT_INFO but requires an extra
1916 M2->M1 truncation.
1918 Which is better will depend on the complexity and cost of
1919 STMT_INFO, which is hard to predict at this stage. However,
1920 a clear tie-breaker in favor of (b) is the fact that the
1921 truncation in (a) increases the length of the operation chain.
1923 If instead of (4) only one of OP1 or OP2 is single-use,
1924 (b) is still a win over doing the operation in N bits:
1925 it still shifts the M2->N widening on the single-use operand
1926 to the output and reduces the number of STMT_INFO copies.
1928 If neither operand is single-use then operating on fewer than
1929 N bits might lead to more extensions overall. Whether it does
1930 or not depends on global information about the vectorization
1931 region, and whether that's a good trade-off would again
1932 depend on the complexity and cost of the statements involved,
1933 as well as things like register pressure that are not normally
1934 modelled at this stage. We therefore ignore these cases
1935 and just optimize the clear single-use wins above.
1937 Thus we take the maximum precision of the unpromoted operands
1938 and record whether any operand is single-use. */
1939 if (unprom[i].dt == vect_internal_def)
1941 min_precision = MAX (min_precision,
1942 TYPE_PRECISION (unprom[i].type));
1943 single_use_p |= op_single_use_p;
1946 else
1947 return NULL;
1950 /* Although the operation could be done in operation_precision, we have
1951 to balance that against introducing extra truncations or extensions.
1952 Calculate the minimum precision that can be handled efficiently.
1954 The loop above determined that the operation could be handled
1955 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1956 extension from the inputs to the output without introducing more
1957 instructions, and would reduce the number of instructions required
1958 for STMT_INFO itself.
1960 vect_determine_precisions has also determined that the result only
1961 needs min_output_precision bits. Truncating by a factor of N times
1962 requires a tree of N - 1 instructions, so if TYPE is N times wider
1963 than min_output_precision, doing the operation in TYPE and truncating
1964 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1965 In contrast:
1967 - truncating the input to a unary operation and doing the operation
1968 in the new type requires at most N - 1 + 1 = N instructions per
1969 output vector
1971 - doing the same for a binary operation requires at most
1972 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1974 Both unary and binary operations require fewer instructions than
1975 this if the operands were extended from a suitable truncated form.
1976 Thus there is usually nothing to lose by doing operations in
1977 min_output_precision bits, but there can be something to gain. */
1978 if (!single_use_p)
1979 min_precision = last_stmt_info->min_output_precision;
1980 else
1981 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1983 /* Apply the minimum efficient precision we just calculated. */
1984 if (new_precision < min_precision)
1985 new_precision = min_precision;
1986 new_precision = vect_element_precision (new_precision);
1987 if (new_precision >= TYPE_PRECISION (type))
1988 return NULL;
1990 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1992 *type_out = get_vectype_for_scalar_type (vinfo, type);
1993 if (!*type_out)
1994 return NULL;
1996 /* We've found a viable pattern. Get the new type of the operation. */
1997 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1998 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
2000 /* If we're truncating an operation, we need to make sure that we
2001 don't introduce new undefined overflow. The codes tested here are
2002 a subset of those accepted by vect_truncatable_operation_p. */
2003 tree op_type = new_type;
2004 if (TYPE_OVERFLOW_UNDEFINED (new_type)
2005 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
2006 op_type = build_nonstandard_integer_type (new_precision, true);
2008 /* We specifically don't check here whether the target supports the
2009 new operation, since it might be something that a later pattern
2010 wants to rewrite anyway. If targets have a minimum element size
2011 for some optabs, we should pattern-match smaller ops to larger ops
2012 where beneficial. */
2013 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2014 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
2015 if (!new_vectype || !op_vectype)
2016 return NULL;
2018 if (dump_enabled_p ())
2019 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
2020 type, new_type);
2022 /* Calculate the rhs operands for an operation on OP_TYPE. */
2023 tree ops[3] = {};
2024 for (unsigned int i = 1; i < first_op; ++i)
2025 ops[i - 1] = gimple_op (last_stmt, i);
2026 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
2027 op_type, &unprom[0], op_vectype);
2029 /* Use the operation to produce a result of type OP_TYPE. */
2030 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
2031 gimple *pattern_stmt = gimple_build_assign (new_var, code,
2032 ops[0], ops[1], ops[2]);
2033 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2035 if (dump_enabled_p ())
2036 dump_printf_loc (MSG_NOTE, vect_location,
2037 "created pattern stmt: %G", pattern_stmt);
2039 /* Convert back to the original signedness, if OP_TYPE is different
2040 from NEW_TYPE. */
2041 if (op_type != new_type)
2042 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
2043 pattern_stmt, op_vectype);
2045 /* Promote the result to the original type. */
2046 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
2047 pattern_stmt, new_vectype);
2049 return pattern_stmt;
2052 /* Recognize the following patterns:
2054 ATYPE a; // narrower than TYPE
2055 BTYPE b; // narrower than TYPE
2057 1) Multiply high with scaling
2058 TYPE res = ((TYPE) a * (TYPE) b) >> c;
2059 Here, c is bitsize (TYPE) / 2 - 1.
2061 2) ... or also with rounding
2062 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2063 Here, d is bitsize (TYPE) / 2 - 2.
2065 3) Normal multiply high
2066 TYPE res = ((TYPE) a * (TYPE) b) >> e;
2067 Here, e is bitsize (TYPE) / 2.
2069 where only the bottom half of res is used. */
2071 static gimple *
2072 vect_recog_mulhs_pattern (vec_info *vinfo,
2073 stmt_vec_info last_stmt_info, tree *type_out)
2075 /* Check for a right shift. */
2076 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2077 if (!last_stmt
2078 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
2079 return NULL;
2081 /* Check that the shift result is wider than the users of the
2082 result need (i.e. that narrowing would be a natural choice). */
2083 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
2084 unsigned int target_precision
2085 = vect_element_precision (last_stmt_info->min_output_precision);
2086 if (!INTEGRAL_TYPE_P (lhs_type)
2087 || target_precision >= TYPE_PRECISION (lhs_type))
2088 return NULL;
2090 /* Look through any change in sign on the outer shift input. */
2091 vect_unpromoted_value unprom_rshift_input;
2092 tree rshift_input = vect_look_through_possible_promotion
2093 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
2094 if (!rshift_input
2095 || TYPE_PRECISION (TREE_TYPE (rshift_input))
2096 != TYPE_PRECISION (lhs_type))
2097 return NULL;
2099 /* Get the definition of the shift input. */
2100 stmt_vec_info rshift_input_stmt_info
2101 = vect_get_internal_def (vinfo, rshift_input);
2102 if (!rshift_input_stmt_info)
2103 return NULL;
2104 gassign *rshift_input_stmt
2105 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2106 if (!rshift_input_stmt)
2107 return NULL;
2109 stmt_vec_info mulh_stmt_info;
2110 tree scale_term;
2111 bool rounding_p = false;
2113 /* Check for the presence of the rounding term. */
2114 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2116 /* Check that the outer shift was by 1. */
2117 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2118 return NULL;
2120 /* Check that the second operand of the PLUS_EXPR is 1. */
2121 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2122 return NULL;
2124 /* Look through any change in sign on the addition input. */
2125 vect_unpromoted_value unprom_plus_input;
2126 tree plus_input = vect_look_through_possible_promotion
2127 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2128 if (!plus_input
2129 || TYPE_PRECISION (TREE_TYPE (plus_input))
2130 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2131 return NULL;
2133 /* Get the definition of the multiply-high-scale part. */
2134 stmt_vec_info plus_input_stmt_info
2135 = vect_get_internal_def (vinfo, plus_input);
2136 if (!plus_input_stmt_info)
2137 return NULL;
2138 gassign *plus_input_stmt
2139 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2140 if (!plus_input_stmt
2141 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2142 return NULL;
2144 /* Look through any change in sign on the scaling input. */
2145 vect_unpromoted_value unprom_scale_input;
2146 tree scale_input = vect_look_through_possible_promotion
2147 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2148 if (!scale_input
2149 || TYPE_PRECISION (TREE_TYPE (scale_input))
2150 != TYPE_PRECISION (TREE_TYPE (plus_input)))
2151 return NULL;
2153 /* Get the definition of the multiply-high part. */
2154 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2155 if (!mulh_stmt_info)
2156 return NULL;
2158 /* Get the scaling term. */
2159 scale_term = gimple_assign_rhs2 (plus_input_stmt);
2160 rounding_p = true;
2162 else
2164 mulh_stmt_info = rshift_input_stmt_info;
2165 scale_term = gimple_assign_rhs2 (last_stmt);
2168 /* Check that the scaling factor is constant. */
2169 if (TREE_CODE (scale_term) != INTEGER_CST)
2170 return NULL;
2172 /* Check whether the scaling input term can be seen as two widened
2173 inputs multiplied together. */
2174 vect_unpromoted_value unprom_mult[2];
2175 tree new_type;
2176 unsigned int nops
2177 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2178 false, 2, unprom_mult, &new_type);
2179 if (nops != 2)
2180 return NULL;
2182 /* Adjust output precision. */
2183 if (TYPE_PRECISION (new_type) < target_precision)
2184 new_type = build_nonstandard_integer_type
2185 (target_precision, TYPE_UNSIGNED (new_type));
2187 unsigned mult_precision = TYPE_PRECISION (new_type);
2188 internal_fn ifn;
2189 /* Check that the scaling factor is expected. Instead of
2190 target_precision, we should use the one that we actually
2191 use for internal function. */
2192 if (rounding_p)
2194 /* Check pattern 2). */
2195 if (wi::to_widest (scale_term) + mult_precision + 2
2196 != TYPE_PRECISION (lhs_type))
2197 return NULL;
2199 ifn = IFN_MULHRS;
2201 else
2203 /* Check for pattern 1). */
2204 if (wi::to_widest (scale_term) + mult_precision + 1
2205 == TYPE_PRECISION (lhs_type))
2206 ifn = IFN_MULHS;
2207 /* Check for pattern 3). */
2208 else if (wi::to_widest (scale_term) + mult_precision
2209 == TYPE_PRECISION (lhs_type))
2210 ifn = IFN_MULH;
2211 else
2212 return NULL;
2215 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2217 /* Check for target support. */
2218 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2219 if (!new_vectype
2220 || !direct_internal_fn_supported_p
2221 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2222 return NULL;
2224 /* The IR requires a valid vector type for the cast result, even though
2225 it's likely to be discarded. */
2226 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2227 if (!*type_out)
2228 return NULL;
2230 /* Generate the IFN_MULHRS call. */
2231 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2232 tree new_ops[2];
2233 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2234 unprom_mult, new_vectype);
2235 gcall *mulhrs_stmt
2236 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2237 gimple_call_set_lhs (mulhrs_stmt, new_var);
2238 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2240 if (dump_enabled_p ())
2241 dump_printf_loc (MSG_NOTE, vect_location,
2242 "created pattern stmt: %G", mulhrs_stmt);
2244 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2245 mulhrs_stmt, new_vectype);
2248 /* Recognize the patterns:
2250 ATYPE a; // narrower than TYPE
2251 BTYPE b; // narrower than TYPE
2252 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2253 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2255 where only the bottom half of avg is used. Try to transform them into:
2257 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2258 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2260 followed by:
2262 TYPE avg = (TYPE) avg';
2264 where NTYPE is no wider than half of TYPE. Since only the bottom half
2265 of avg is used, all or part of the cast of avg' should become redundant.
2267 If there is no target support available, generate code to distribute rshift
2268 over plus and add a carry. */
2270 static gimple *
2271 vect_recog_average_pattern (vec_info *vinfo,
2272 stmt_vec_info last_stmt_info, tree *type_out)
2274 /* Check for a shift right by one bit. */
2275 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2276 if (!last_stmt
2277 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2278 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2279 return NULL;
2281 /* Check that the shift result is wider than the users of the
2282 result need (i.e. that narrowing would be a natural choice). */
2283 tree lhs = gimple_assign_lhs (last_stmt);
2284 tree type = TREE_TYPE (lhs);
2285 unsigned int target_precision
2286 = vect_element_precision (last_stmt_info->min_output_precision);
2287 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2288 return NULL;
2290 /* Look through any change in sign on the shift input. */
2291 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2292 vect_unpromoted_value unprom_plus;
2293 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2294 &unprom_plus);
2295 if (!rshift_rhs
2296 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2297 return NULL;
2299 /* Get the definition of the shift input. */
2300 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2301 if (!plus_stmt_info)
2302 return NULL;
2304 /* Check whether the shift input can be seen as a tree of additions on
2305 2 or 3 widened inputs.
2307 Note that the pattern should be a win even if the result of one or
2308 more additions is reused elsewhere: if the pattern matches, we'd be
2309 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2310 internal_fn ifn = IFN_AVG_FLOOR;
2311 vect_unpromoted_value unprom[3];
2312 tree new_type;
2313 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2314 WIDEN_PLUS_EXPR, false, 3,
2315 unprom, &new_type);
2316 if (nops == 0)
2317 return NULL;
2318 if (nops == 3)
2320 /* Check that one operand is 1. */
2321 unsigned int i;
2322 for (i = 0; i < 3; ++i)
2323 if (integer_onep (unprom[i].op))
2324 break;
2325 if (i == 3)
2326 return NULL;
2327 /* Throw away the 1 operand and keep the other two. */
2328 if (i < 2)
2329 unprom[i] = unprom[2];
2330 ifn = IFN_AVG_CEIL;
2333 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2335 /* We know that:
2337 (a) the operation can be viewed as:
2339 TYPE widened0 = (TYPE) UNPROM[0];
2340 TYPE widened1 = (TYPE) UNPROM[1];
2341 TYPE tmp1 = widened0 + widened1 {+ 1};
2342 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2344 (b) the first two statements are equivalent to:
2346 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2347 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2349 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2350 where sensible;
2352 (d) all the operations can be performed correctly at twice the width of
2353 NEW_TYPE, due to the nature of the average operation; and
2355 (e) users of the result of the right shift need only TARGET_PRECISION
2356 bits, where TARGET_PRECISION is no more than half of TYPE's
2357 precision.
2359 Under these circumstances, the only situation in which NEW_TYPE
2360 could be narrower than TARGET_PRECISION is if widened0, widened1
2361 and an addition result are all used more than once. Thus we can
2362 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2363 as "free", whereas widening the result of the average instruction
2364 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2365 therefore better not to go narrower than TARGET_PRECISION. */
2366 if (TYPE_PRECISION (new_type) < target_precision)
2367 new_type = build_nonstandard_integer_type (target_precision,
2368 TYPE_UNSIGNED (new_type));
2370 /* Check for target support. */
2371 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2372 if (!new_vectype)
2373 return NULL;
2375 bool fallback_p = false;
2377 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2379 else if (TYPE_UNSIGNED (new_type)
2380 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2381 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2382 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2383 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2384 fallback_p = true;
2385 else
2386 return NULL;
2388 /* The IR requires a valid vector type for the cast result, even though
2389 it's likely to be discarded. */
2390 *type_out = get_vectype_for_scalar_type (vinfo, type);
2391 if (!*type_out)
2392 return NULL;
2394 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2395 tree new_ops[2];
2396 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2397 unprom, new_vectype);
2399 if (fallback_p)
2401 /* As a fallback, generate code for following sequence:
2403 shifted_op0 = new_ops[0] >> 1;
2404 shifted_op1 = new_ops[1] >> 1;
2405 sum_of_shifted = shifted_op0 + shifted_op1;
2406 unmasked_carry = new_ops[0] and/or new_ops[1];
2407 carry = unmasked_carry & 1;
2408 new_var = sum_of_shifted + carry;
2411 tree one_cst = build_one_cst (new_type);
2412 gassign *g;
2414 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2415 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2416 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2418 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2419 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2420 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2422 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2423 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2424 shifted_op0, shifted_op1);
2425 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2427 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2428 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2429 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2430 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2432 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2433 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2434 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2436 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2437 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2440 /* Generate the IFN_AVG* call. */
2441 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2442 new_ops[1]);
2443 gimple_call_set_lhs (average_stmt, new_var);
2444 gimple_set_location (average_stmt, gimple_location (last_stmt));
2446 if (dump_enabled_p ())
2447 dump_printf_loc (MSG_NOTE, vect_location,
2448 "created pattern stmt: %G", average_stmt);
2450 return vect_convert_output (vinfo, last_stmt_info,
2451 type, average_stmt, new_vectype);
2454 /* Recognize cases in which the input to a cast is wider than its
2455 output, and the input is fed by a widening operation. Fold this
2456 by removing the unnecessary intermediate widening. E.g.:
2458 unsigned char a;
2459 unsigned int b = (unsigned int) a;
2460 unsigned short c = (unsigned short) b;
2464 unsigned short c = (unsigned short) a;
2466 Although this is rare in input IR, it is an expected side-effect
2467 of the over-widening pattern above.
2469 This is beneficial also for integer-to-float conversions, if the
2470 widened integer has more bits than the float, and if the unwidened
2471 input doesn't. */
2473 static gimple *
2474 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2475 stmt_vec_info last_stmt_info, tree *type_out)
2477 /* Check for a cast, including an integer-to-float conversion. */
2478 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2479 if (!last_stmt)
2480 return NULL;
2481 tree_code code = gimple_assign_rhs_code (last_stmt);
2482 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2483 return NULL;
2485 /* Make sure that the rhs is a scalar with a natural bitsize. */
2486 tree lhs = gimple_assign_lhs (last_stmt);
2487 if (!lhs)
2488 return NULL;
2489 tree lhs_type = TREE_TYPE (lhs);
2490 scalar_mode lhs_mode;
2491 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2492 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2493 return NULL;
2495 /* Check for a narrowing operation (from a vector point of view). */
2496 tree rhs = gimple_assign_rhs1 (last_stmt);
2497 tree rhs_type = TREE_TYPE (rhs);
2498 if (!INTEGRAL_TYPE_P (rhs_type)
2499 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2500 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2501 return NULL;
2503 /* Try to find an unpromoted input. */
2504 vect_unpromoted_value unprom;
2505 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2506 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2507 return NULL;
2509 /* If the bits above RHS_TYPE matter, make sure that they're the
2510 same when extending from UNPROM as they are when extending from RHS. */
2511 if (!INTEGRAL_TYPE_P (lhs_type)
2512 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2513 return NULL;
2515 /* We can get the same result by casting UNPROM directly, to avoid
2516 the unnecessary widening and narrowing. */
2517 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2519 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2520 if (!*type_out)
2521 return NULL;
2523 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2524 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2525 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2527 return pattern_stmt;
2530 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2531 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2533 static gimple *
2534 vect_recog_widen_shift_pattern (vec_info *vinfo,
2535 stmt_vec_info last_stmt_info, tree *type_out)
2537 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2538 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2539 "vect_recog_widen_shift_pattern");
2542 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2544 type a_t, b_t, c_t;
2546 S0 a_t = b_t r<< c_t;
2548 Input/Output:
2550 * STMT_VINFO: The stmt from which the pattern search begins,
2551 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2552 with a sequence:
2554 S1 d_t = -c_t;
2555 S2 e_t = d_t & (B - 1);
2556 S3 f_t = b_t << c_t;
2557 S4 g_t = b_t >> e_t;
2558 S0 a_t = f_t | g_t;
2560 where B is element bitsize of type.
2562 Output:
2564 * TYPE_OUT: The type of the output of this pattern.
2566 * Return value: A new stmt that will be used to replace the rotate
2567 S0 stmt. */
2569 static gimple *
2570 vect_recog_rotate_pattern (vec_info *vinfo,
2571 stmt_vec_info stmt_vinfo, tree *type_out)
2573 gimple *last_stmt = stmt_vinfo->stmt;
2574 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2575 gimple *pattern_stmt, *def_stmt;
2576 enum tree_code rhs_code;
2577 enum vect_def_type dt;
2578 optab optab1, optab2;
2579 edge ext_def = NULL;
2580 bool bswap16_p = false;
2582 if (is_gimple_assign (last_stmt))
2584 rhs_code = gimple_assign_rhs_code (last_stmt);
2585 switch (rhs_code)
2587 case LROTATE_EXPR:
2588 case RROTATE_EXPR:
2589 break;
2590 default:
2591 return NULL;
2594 lhs = gimple_assign_lhs (last_stmt);
2595 oprnd0 = gimple_assign_rhs1 (last_stmt);
2596 type = TREE_TYPE (oprnd0);
2597 oprnd1 = gimple_assign_rhs2 (last_stmt);
2599 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2601 /* __builtin_bswap16 (x) is another form of x r>> 8.
2602 The vectorizer has bswap support, but only if the argument isn't
2603 promoted. */
2604 lhs = gimple_call_lhs (last_stmt);
2605 oprnd0 = gimple_call_arg (last_stmt, 0);
2606 type = TREE_TYPE (oprnd0);
2607 if (!lhs
2608 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2609 || TYPE_PRECISION (type) <= 16
2610 || TREE_CODE (oprnd0) != SSA_NAME
2611 || BITS_PER_UNIT != 8
2612 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2613 return NULL;
2615 stmt_vec_info def_stmt_info;
2616 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2617 return NULL;
2619 if (dt != vect_internal_def)
2620 return NULL;
2622 if (gimple_assign_cast_p (def_stmt))
2624 def = gimple_assign_rhs1 (def_stmt);
2625 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2626 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2627 oprnd0 = def;
2630 type = TREE_TYPE (lhs);
2631 vectype = get_vectype_for_scalar_type (vinfo, type);
2632 if (vectype == NULL_TREE)
2633 return NULL;
2635 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2637 /* The encoding uses one stepped pattern for each byte in the
2638 16-bit word. */
2639 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2640 for (unsigned i = 0; i < 3; ++i)
2641 for (unsigned j = 0; j < 2; ++j)
2642 elts.quick_push ((i + 1) * 2 - j - 1);
2644 vec_perm_indices indices (elts, 1,
2645 TYPE_VECTOR_SUBPARTS (char_vectype));
2646 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2648 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2649 undo the argument promotion. */
2650 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2652 def = vect_recog_temp_ssa_var (type, NULL);
2653 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2654 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2655 oprnd0 = def;
2658 /* Pattern detected. */
2659 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2661 *type_out = vectype;
2663 /* Pattern supported. Create a stmt to be used to replace the
2664 pattern, with the unpromoted argument. */
2665 var = vect_recog_temp_ssa_var (type, NULL);
2666 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2667 1, oprnd0);
2668 gimple_call_set_lhs (pattern_stmt, var);
2669 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2670 gimple_call_fntype (last_stmt));
2671 return pattern_stmt;
2675 oprnd1 = build_int_cst (integer_type_node, 8);
2676 rhs_code = LROTATE_EXPR;
2677 bswap16_p = true;
2679 else
2680 return NULL;
2682 if (TREE_CODE (oprnd0) != SSA_NAME
2683 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2684 || !INTEGRAL_TYPE_P (type)
2685 || !TYPE_UNSIGNED (type))
2686 return NULL;
2688 stmt_vec_info def_stmt_info;
2689 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2690 return NULL;
2692 if (dt != vect_internal_def
2693 && dt != vect_constant_def
2694 && dt != vect_external_def)
2695 return NULL;
2697 vectype = get_vectype_for_scalar_type (vinfo, type);
2698 if (vectype == NULL_TREE)
2699 return NULL;
2701 /* If vector/vector or vector/scalar rotate is supported by the target,
2702 don't do anything here. */
2703 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2704 if (optab1
2705 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2707 use_rotate:
2708 if (bswap16_p)
2710 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2712 def = vect_recog_temp_ssa_var (type, NULL);
2713 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2714 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2715 oprnd0 = def;
2718 /* Pattern detected. */
2719 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2721 *type_out = vectype;
2723 /* Pattern supported. Create a stmt to be used to replace the
2724 pattern. */
2725 var = vect_recog_temp_ssa_var (type, NULL);
2726 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2727 oprnd1);
2728 return pattern_stmt;
2730 return NULL;
2733 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2735 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2736 if (optab2
2737 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2738 goto use_rotate;
2741 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2742 don't do anything here either. */
2743 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2744 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2745 if (!optab1
2746 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2747 || !optab2
2748 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2750 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2751 return NULL;
2752 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2753 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2754 if (!optab1
2755 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2756 || !optab2
2757 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2758 return NULL;
2761 *type_out = vectype;
2763 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2765 def = vect_recog_temp_ssa_var (type, NULL);
2766 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2767 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2768 oprnd0 = def;
2771 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2772 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2774 def = NULL_TREE;
2775 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2776 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2777 def = oprnd1;
2778 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2780 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2781 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2782 && TYPE_PRECISION (TREE_TYPE (rhs1))
2783 == TYPE_PRECISION (type))
2784 def = rhs1;
2787 if (def == NULL_TREE)
2789 def = vect_recog_temp_ssa_var (type, NULL);
2790 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2791 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2793 stype = TREE_TYPE (def);
2795 if (TREE_CODE (def) == INTEGER_CST)
2797 if (!tree_fits_uhwi_p (def)
2798 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2799 || integer_zerop (def))
2800 return NULL;
2801 def2 = build_int_cst (stype,
2802 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2804 else
2806 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2808 if (vecstype == NULL_TREE)
2809 return NULL;
2810 def2 = vect_recog_temp_ssa_var (stype, NULL);
2811 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2812 if (ext_def)
2814 basic_block new_bb
2815 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2816 gcc_assert (!new_bb);
2818 else
2819 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2821 def2 = vect_recog_temp_ssa_var (stype, NULL);
2822 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2823 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2824 gimple_assign_lhs (def_stmt), mask);
2825 if (ext_def)
2827 basic_block new_bb
2828 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2829 gcc_assert (!new_bb);
2831 else
2832 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2835 var1 = vect_recog_temp_ssa_var (type, NULL);
2836 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2837 ? LSHIFT_EXPR : RSHIFT_EXPR,
2838 oprnd0, def);
2839 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2841 var2 = vect_recog_temp_ssa_var (type, NULL);
2842 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2843 ? RSHIFT_EXPR : LSHIFT_EXPR,
2844 oprnd0, def2);
2845 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2847 /* Pattern detected. */
2848 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2850 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2851 var = vect_recog_temp_ssa_var (type, NULL);
2852 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2854 return pattern_stmt;
2857 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2858 vectorized:
2860 type a_t;
2861 TYPE b_T, res_T;
2863 S1 a_t = ;
2864 S2 b_T = ;
2865 S3 res_T = b_T op a_t;
2867 where type 'TYPE' is a type with different size than 'type',
2868 and op is <<, >> or rotate.
2870 Also detect cases:
2872 type a_t;
2873 TYPE b_T, c_T, res_T;
2875 S0 c_T = ;
2876 S1 a_t = (type) c_T;
2877 S2 b_T = ;
2878 S3 res_T = b_T op a_t;
2880 Input/Output:
2882 * STMT_VINFO: The stmt from which the pattern search begins,
2883 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2884 with a shift/rotate which has same type on both operands, in the
2885 second case just b_T op c_T, in the first case with added cast
2886 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2888 Output:
2890 * TYPE_OUT: The type of the output of this pattern.
2892 * Return value: A new stmt that will be used to replace the shift/rotate
2893 S3 stmt. */
2895 static gimple *
2896 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2897 stmt_vec_info stmt_vinfo,
2898 tree *type_out)
2900 gimple *last_stmt = stmt_vinfo->stmt;
2901 tree oprnd0, oprnd1, lhs, var;
2902 gimple *pattern_stmt;
2903 enum tree_code rhs_code;
2905 if (!is_gimple_assign (last_stmt))
2906 return NULL;
2908 rhs_code = gimple_assign_rhs_code (last_stmt);
2909 switch (rhs_code)
2911 case LSHIFT_EXPR:
2912 case RSHIFT_EXPR:
2913 case LROTATE_EXPR:
2914 case RROTATE_EXPR:
2915 break;
2916 default:
2917 return NULL;
2920 lhs = gimple_assign_lhs (last_stmt);
2921 oprnd0 = gimple_assign_rhs1 (last_stmt);
2922 oprnd1 = gimple_assign_rhs2 (last_stmt);
2923 if (TREE_CODE (oprnd0) != SSA_NAME
2924 || TREE_CODE (oprnd1) != SSA_NAME
2925 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2926 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2927 || TYPE_PRECISION (TREE_TYPE (lhs))
2928 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2929 return NULL;
2931 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2932 if (!def_vinfo)
2933 return NULL;
2935 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2936 if (*type_out == NULL_TREE)
2937 return NULL;
2939 tree def = NULL_TREE;
2940 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2941 if (def_stmt && gimple_assign_cast_p (def_stmt))
2943 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2944 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2945 && TYPE_PRECISION (TREE_TYPE (rhs1))
2946 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2948 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2949 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2950 def = rhs1;
2951 else
2953 tree mask
2954 = build_low_bits_mask (TREE_TYPE (rhs1),
2955 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2956 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2957 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2958 tree vecstype = get_vectype_for_scalar_type (vinfo,
2959 TREE_TYPE (rhs1));
2960 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2965 if (def == NULL_TREE)
2967 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2968 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2969 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2972 /* Pattern detected. */
2973 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2975 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2976 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2977 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2979 return pattern_stmt;
2982 /* Return true iff the target has a vector optab implementing the operation
2983 CODE on type VECTYPE. */
2985 static bool
2986 target_has_vecop_for_code (tree_code code, tree vectype)
2988 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2989 return voptab
2990 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2993 /* Verify that the target has optabs of VECTYPE to perform all the steps
2994 needed by the multiplication-by-immediate synthesis algorithm described by
2995 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2996 present. Return true iff the target supports all the steps. */
2998 static bool
2999 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
3000 tree vectype, bool synth_shift_p)
3002 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
3003 return false;
3005 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
3006 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
3008 if (var == negate_variant
3009 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
3010 return false;
3012 /* If we must synthesize shifts with additions make sure that vector
3013 addition is available. */
3014 if ((var == add_variant || synth_shift_p) && !supports_vplus)
3015 return false;
3017 for (int i = 1; i < alg->ops; i++)
3019 switch (alg->op[i])
3021 case alg_shift:
3022 break;
3023 case alg_add_t_m2:
3024 case alg_add_t2_m:
3025 case alg_add_factor:
3026 if (!supports_vplus)
3027 return false;
3028 break;
3029 case alg_sub_t_m2:
3030 case alg_sub_t2_m:
3031 case alg_sub_factor:
3032 if (!supports_vminus)
3033 return false;
3034 break;
3035 case alg_unknown:
3036 case alg_m:
3037 case alg_zero:
3038 case alg_impossible:
3039 return false;
3040 default:
3041 gcc_unreachable ();
3045 return true;
3048 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3049 putting the final result in DEST. Append all statements but the last into
3050 VINFO. Return the last statement. */
3052 static gimple *
3053 synth_lshift_by_additions (vec_info *vinfo,
3054 tree dest, tree op, HOST_WIDE_INT amnt,
3055 stmt_vec_info stmt_info)
3057 HOST_WIDE_INT i;
3058 tree itype = TREE_TYPE (op);
3059 tree prev_res = op;
3060 gcc_assert (amnt >= 0);
3061 for (i = 0; i < amnt; i++)
3063 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
3064 : dest;
3065 gimple *stmt
3066 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
3067 prev_res = tmp_var;
3068 if (i < amnt - 1)
3069 append_pattern_def_seq (vinfo, stmt_info, stmt);
3070 else
3071 return stmt;
3073 gcc_unreachable ();
3074 return NULL;
3077 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
3078 CODE to operands OP1 and OP2, creating a new temporary SSA var in
3079 the process if necessary. Append the resulting assignment statements
3080 to the sequence in STMT_VINFO. Return the SSA variable that holds the
3081 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
3082 left shifts using additions. */
3084 static tree
3085 apply_binop_and_append_stmt (vec_info *vinfo,
3086 tree_code code, tree op1, tree op2,
3087 stmt_vec_info stmt_vinfo, bool synth_shift_p)
3089 if (integer_zerop (op2)
3090 && (code == LSHIFT_EXPR
3091 || code == PLUS_EXPR))
3093 gcc_assert (TREE_CODE (op1) == SSA_NAME);
3094 return op1;
3097 gimple *stmt;
3098 tree itype = TREE_TYPE (op1);
3099 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3101 if (code == LSHIFT_EXPR
3102 && synth_shift_p)
3104 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3105 TREE_INT_CST_LOW (op2), stmt_vinfo);
3106 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3107 return tmp_var;
3110 stmt = gimple_build_assign (tmp_var, code, op1, op2);
3111 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3112 return tmp_var;
3115 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3116 and simple arithmetic operations to be vectorized. Record the statements
3117 produced in STMT_VINFO and return the last statement in the sequence or
3118 NULL if it's not possible to synthesize such a multiplication.
3119 This function mirrors the behavior of expand_mult_const in expmed.cc but
3120 works on tree-ssa form. */
3122 static gimple *
3123 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3124 stmt_vec_info stmt_vinfo)
3126 tree itype = TREE_TYPE (op);
3127 machine_mode mode = TYPE_MODE (itype);
3128 struct algorithm alg;
3129 mult_variant variant;
3130 if (!tree_fits_shwi_p (val))
3131 return NULL;
3133 /* Multiplication synthesis by shifts, adds and subs can introduce
3134 signed overflow where the original operation didn't. Perform the
3135 operations on an unsigned type and cast back to avoid this.
3136 In the future we may want to relax this for synthesis algorithms
3137 that we can prove do not cause unexpected overflow. */
3138 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3140 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3141 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3142 if (!vectype)
3143 return NULL;
3145 /* Targets that don't support vector shifts but support vector additions
3146 can synthesize shifts that way. */
3147 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3149 HOST_WIDE_INT hwval = tree_to_shwi (val);
3150 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3151 The vectorizer's benefit analysis will decide whether it's beneficial
3152 to do this. */
3153 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
3154 ? TYPE_MODE (vectype) : mode,
3155 hwval, &alg, &variant, MAX_COST);
3156 if (!possible)
3157 return NULL;
3159 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
3160 return NULL;
3162 tree accumulator;
3164 /* Clear out the sequence of statements so we can populate it below. */
3165 gimple *stmt = NULL;
3167 if (cast_to_unsigned_p)
3169 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3170 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3171 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3172 op = tmp_op;
3175 if (alg.op[0] == alg_zero)
3176 accumulator = build_int_cst (multtype, 0);
3177 else
3178 accumulator = op;
3180 bool needs_fixup = (variant == negate_variant)
3181 || (variant == add_variant);
3183 for (int i = 1; i < alg.ops; i++)
3185 tree shft_log = build_int_cst (multtype, alg.log[i]);
3186 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3187 tree tmp_var = NULL_TREE;
3189 switch (alg.op[i])
3191 case alg_shift:
3192 if (synth_shift_p)
3193 stmt
3194 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3195 alg.log[i], stmt_vinfo);
3196 else
3197 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3198 shft_log);
3199 break;
3200 case alg_add_t_m2:
3201 tmp_var
3202 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3203 stmt_vinfo, synth_shift_p);
3204 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3205 tmp_var);
3206 break;
3207 case alg_sub_t_m2:
3208 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3209 shft_log, stmt_vinfo,
3210 synth_shift_p);
3211 /* In some algorithms the first step involves zeroing the
3212 accumulator. If subtracting from such an accumulator
3213 just emit the negation directly. */
3214 if (integer_zerop (accumulator))
3215 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3216 else
3217 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3218 tmp_var);
3219 break;
3220 case alg_add_t2_m:
3221 tmp_var
3222 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3223 shft_log, stmt_vinfo, synth_shift_p);
3224 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3225 break;
3226 case alg_sub_t2_m:
3227 tmp_var
3228 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3229 shft_log, stmt_vinfo, synth_shift_p);
3230 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3231 break;
3232 case alg_add_factor:
3233 tmp_var
3234 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3235 shft_log, stmt_vinfo, synth_shift_p);
3236 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3237 tmp_var);
3238 break;
3239 case alg_sub_factor:
3240 tmp_var
3241 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3242 shft_log, stmt_vinfo, synth_shift_p);
3243 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3244 accumulator);
3245 break;
3246 default:
3247 gcc_unreachable ();
3249 /* We don't want to append the last stmt in the sequence to stmt_vinfo
3250 but rather return it directly. */
3252 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3253 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3254 accumulator = accum_tmp;
3256 if (variant == negate_variant)
3258 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3259 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3260 accumulator = accum_tmp;
3261 if (cast_to_unsigned_p)
3262 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3264 else if (variant == add_variant)
3266 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3267 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3268 accumulator = accum_tmp;
3269 if (cast_to_unsigned_p)
3270 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3272 /* Move back to a signed if needed. */
3273 if (cast_to_unsigned_p)
3275 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3276 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3279 return stmt;
3282 /* Detect multiplication by constant and convert it into a sequence of
3283 shifts and additions, subtractions, negations. We reuse the
3284 choose_mult_variant algorithms from expmed.cc
3286 Input/Output:
3288 STMT_VINFO: The stmt from which the pattern search begins,
3289 i.e. the mult stmt.
3291 Output:
3293 * TYPE_OUT: The type of the output of this pattern.
3295 * Return value: A new stmt that will be used to replace
3296 the multiplication. */
3298 static gimple *
3299 vect_recog_mult_pattern (vec_info *vinfo,
3300 stmt_vec_info stmt_vinfo, tree *type_out)
3302 gimple *last_stmt = stmt_vinfo->stmt;
3303 tree oprnd0, oprnd1, vectype, itype;
3304 gimple *pattern_stmt;
3306 if (!is_gimple_assign (last_stmt))
3307 return NULL;
3309 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3310 return NULL;
3312 oprnd0 = gimple_assign_rhs1 (last_stmt);
3313 oprnd1 = gimple_assign_rhs2 (last_stmt);
3314 itype = TREE_TYPE (oprnd0);
3316 if (TREE_CODE (oprnd0) != SSA_NAME
3317 || TREE_CODE (oprnd1) != INTEGER_CST
3318 || !INTEGRAL_TYPE_P (itype)
3319 || !type_has_mode_precision_p (itype))
3320 return NULL;
3322 vectype = get_vectype_for_scalar_type (vinfo, itype);
3323 if (vectype == NULL_TREE)
3324 return NULL;
3326 /* If the target can handle vectorized multiplication natively,
3327 don't attempt to optimize this. */
3328 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3329 if (mul_optab != unknown_optab)
3331 machine_mode vec_mode = TYPE_MODE (vectype);
3332 int icode = (int) optab_handler (mul_optab, vec_mode);
3333 if (icode != CODE_FOR_nothing)
3334 return NULL;
3337 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3338 oprnd0, oprnd1, stmt_vinfo);
3339 if (!pattern_stmt)
3340 return NULL;
3342 /* Pattern detected. */
3343 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3345 *type_out = vectype;
3347 return pattern_stmt;
3350 /* Detect a signed division by a constant that wouldn't be
3351 otherwise vectorized:
3353 type a_t, b_t;
3355 S1 a_t = b_t / N;
3357 where type 'type' is an integral type and N is a constant.
3359 Similarly handle modulo by a constant:
3361 S4 a_t = b_t % N;
3363 Input/Output:
3365 * STMT_VINFO: The stmt from which the pattern search begins,
3366 i.e. the division stmt. S1 is replaced by if N is a power
3367 of two constant and type is signed:
3368 S3 y_t = b_t < 0 ? N - 1 : 0;
3369 S2 x_t = b_t + y_t;
3370 S1' a_t = x_t >> log2 (N);
3372 S4 is replaced if N is a power of two constant and
3373 type is signed by (where *_T temporaries have unsigned type):
3374 S9 y_T = b_t < 0 ? -1U : 0U;
3375 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3376 S7 z_t = (type) z_T;
3377 S6 w_t = b_t + z_t;
3378 S5 x_t = w_t & (N - 1);
3379 S4' a_t = x_t - z_t;
3381 Output:
3383 * TYPE_OUT: The type of the output of this pattern.
3385 * Return value: A new stmt that will be used to replace the division
3386 S1 or modulo S4 stmt. */
3388 static gimple *
3389 vect_recog_divmod_pattern (vec_info *vinfo,
3390 stmt_vec_info stmt_vinfo, tree *type_out)
3392 gimple *last_stmt = stmt_vinfo->stmt;
3393 tree oprnd0, oprnd1, vectype, itype, cond;
3394 gimple *pattern_stmt, *def_stmt;
3395 enum tree_code rhs_code;
3396 optab optab;
3397 tree q;
3398 int dummy_int, prec;
3400 if (!is_gimple_assign (last_stmt))
3401 return NULL;
3403 rhs_code = gimple_assign_rhs_code (last_stmt);
3404 switch (rhs_code)
3406 case TRUNC_DIV_EXPR:
3407 case EXACT_DIV_EXPR:
3408 case TRUNC_MOD_EXPR:
3409 break;
3410 default:
3411 return NULL;
3414 oprnd0 = gimple_assign_rhs1 (last_stmt);
3415 oprnd1 = gimple_assign_rhs2 (last_stmt);
3416 itype = TREE_TYPE (oprnd0);
3417 if (TREE_CODE (oprnd0) != SSA_NAME
3418 || TREE_CODE (oprnd1) != INTEGER_CST
3419 || TREE_CODE (itype) != INTEGER_TYPE
3420 || !type_has_mode_precision_p (itype))
3421 return NULL;
3423 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3424 vectype = get_vectype_for_scalar_type (vinfo, itype);
3425 if (vectype == NULL_TREE)
3426 return NULL;
3428 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3430 /* If the target can handle vectorized division or modulo natively,
3431 don't attempt to optimize this, since native division is likely
3432 to give smaller code. */
3433 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3434 if (optab != unknown_optab)
3436 machine_mode vec_mode = TYPE_MODE (vectype);
3437 int icode = (int) optab_handler (optab, vec_mode);
3438 if (icode != CODE_FOR_nothing)
3439 return NULL;
3443 prec = TYPE_PRECISION (itype);
3444 if (integer_pow2p (oprnd1))
3446 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3447 return NULL;
3449 /* Pattern detected. */
3450 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3452 *type_out = vectype;
3454 /* Check if the target supports this internal function. */
3455 internal_fn ifn = IFN_DIV_POW2;
3456 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3458 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3460 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3461 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3462 gimple_call_set_lhs (div_stmt, var_div);
3464 if (rhs_code == TRUNC_MOD_EXPR)
3466 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3467 def_stmt
3468 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3469 LSHIFT_EXPR, var_div, shift);
3470 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3471 pattern_stmt
3472 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3473 MINUS_EXPR, oprnd0,
3474 gimple_assign_lhs (def_stmt));
3476 else
3477 pattern_stmt = div_stmt;
3478 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3480 return pattern_stmt;
3483 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3484 build_int_cst (itype, 0));
3485 if (rhs_code == TRUNC_DIV_EXPR
3486 || rhs_code == EXACT_DIV_EXPR)
3488 tree var = vect_recog_temp_ssa_var (itype, NULL);
3489 tree shift;
3490 def_stmt
3491 = gimple_build_assign (var, COND_EXPR, cond,
3492 fold_build2 (MINUS_EXPR, itype, oprnd1,
3493 build_int_cst (itype, 1)),
3494 build_int_cst (itype, 0));
3495 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3496 var = vect_recog_temp_ssa_var (itype, NULL);
3497 def_stmt
3498 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3499 gimple_assign_lhs (def_stmt));
3500 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3502 shift = build_int_cst (itype, tree_log2 (oprnd1));
3503 pattern_stmt
3504 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3505 RSHIFT_EXPR, var, shift);
3507 else
3509 tree signmask;
3510 if (compare_tree_int (oprnd1, 2) == 0)
3512 signmask = vect_recog_temp_ssa_var (itype, NULL);
3513 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3514 build_int_cst (itype, 1),
3515 build_int_cst (itype, 0));
3516 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3518 else
3520 tree utype
3521 = build_nonstandard_integer_type (prec, 1);
3522 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3523 tree shift
3524 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3525 - tree_log2 (oprnd1));
3526 tree var = vect_recog_temp_ssa_var (utype, NULL);
3528 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3529 build_int_cst (utype, -1),
3530 build_int_cst (utype, 0));
3531 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3532 var = vect_recog_temp_ssa_var (utype, NULL);
3533 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3534 gimple_assign_lhs (def_stmt),
3535 shift);
3536 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3537 signmask = vect_recog_temp_ssa_var (itype, NULL);
3538 def_stmt
3539 = gimple_build_assign (signmask, NOP_EXPR, var);
3540 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3542 def_stmt
3543 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3544 PLUS_EXPR, oprnd0, signmask);
3545 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3546 def_stmt
3547 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3548 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3549 fold_build2 (MINUS_EXPR, itype, oprnd1,
3550 build_int_cst (itype, 1)));
3551 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3553 pattern_stmt
3554 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3555 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3556 signmask);
3559 return pattern_stmt;
3562 if (prec > HOST_BITS_PER_WIDE_INT
3563 || integer_zerop (oprnd1))
3564 return NULL;
3566 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3567 return NULL;
3569 if (TYPE_UNSIGNED (itype))
3571 unsigned HOST_WIDE_INT mh, ml;
3572 int pre_shift, post_shift;
3573 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3574 & GET_MODE_MASK (itype_mode));
3575 tree t1, t2, t3, t4;
3577 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3578 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3579 return NULL;
3581 /* Find a suitable multiplier and right shift count
3582 instead of multiplying with D. */
3583 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3585 /* If the suggested multiplier is more than SIZE bits, we can do better
3586 for even divisors, using an initial right shift. */
3587 if (mh != 0 && (d & 1) == 0)
3589 pre_shift = ctz_or_zero (d);
3590 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3591 &ml, &post_shift, &dummy_int);
3592 gcc_assert (!mh);
3594 else
3595 pre_shift = 0;
3597 if (mh != 0)
3599 if (post_shift - 1 >= prec)
3600 return NULL;
3602 /* t1 = oprnd0 h* ml;
3603 t2 = oprnd0 - t1;
3604 t3 = t2 >> 1;
3605 t4 = t1 + t3;
3606 q = t4 >> (post_shift - 1); */
3607 t1 = vect_recog_temp_ssa_var (itype, NULL);
3608 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3609 build_int_cst (itype, ml));
3610 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3612 t2 = vect_recog_temp_ssa_var (itype, NULL);
3613 def_stmt
3614 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3615 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3617 t3 = vect_recog_temp_ssa_var (itype, NULL);
3618 def_stmt
3619 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3620 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3622 t4 = vect_recog_temp_ssa_var (itype, NULL);
3623 def_stmt
3624 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3626 if (post_shift != 1)
3628 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3630 q = vect_recog_temp_ssa_var (itype, NULL);
3631 pattern_stmt
3632 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3633 build_int_cst (itype, post_shift - 1));
3635 else
3637 q = t4;
3638 pattern_stmt = def_stmt;
3641 else
3643 if (pre_shift >= prec || post_shift >= prec)
3644 return NULL;
3646 /* t1 = oprnd0 >> pre_shift;
3647 t2 = t1 h* ml;
3648 q = t2 >> post_shift; */
3649 if (pre_shift)
3651 t1 = vect_recog_temp_ssa_var (itype, NULL);
3652 def_stmt
3653 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3654 build_int_cst (NULL, pre_shift));
3655 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3657 else
3658 t1 = oprnd0;
3660 t2 = vect_recog_temp_ssa_var (itype, NULL);
3661 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3662 build_int_cst (itype, ml));
3664 if (post_shift)
3666 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3668 q = vect_recog_temp_ssa_var (itype, NULL);
3669 def_stmt
3670 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3671 build_int_cst (itype, post_shift));
3673 else
3674 q = t2;
3676 pattern_stmt = def_stmt;
3679 else
3681 unsigned HOST_WIDE_INT ml;
3682 int post_shift;
3683 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3684 unsigned HOST_WIDE_INT abs_d;
3685 bool add = false;
3686 tree t1, t2, t3, t4;
3688 /* Give up for -1. */
3689 if (d == -1)
3690 return NULL;
3692 /* Since d might be INT_MIN, we have to cast to
3693 unsigned HOST_WIDE_INT before negating to avoid
3694 undefined signed overflow. */
3695 abs_d = (d >= 0
3696 ? (unsigned HOST_WIDE_INT) d
3697 : - (unsigned HOST_WIDE_INT) d);
3699 /* n rem d = n rem -d */
3700 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3702 d = abs_d;
3703 oprnd1 = build_int_cst (itype, abs_d);
3705 if (HOST_BITS_PER_WIDE_INT >= prec
3706 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3707 /* This case is not handled correctly below. */
3708 return NULL;
3710 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3711 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3713 add = true;
3714 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3716 if (post_shift >= prec)
3717 return NULL;
3719 /* t1 = oprnd0 h* ml; */
3720 t1 = vect_recog_temp_ssa_var (itype, NULL);
3721 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3722 build_int_cst (itype, ml));
3724 if (add)
3726 /* t2 = t1 + oprnd0; */
3727 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3728 t2 = vect_recog_temp_ssa_var (itype, NULL);
3729 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3731 else
3732 t2 = t1;
3734 if (post_shift)
3736 /* t3 = t2 >> post_shift; */
3737 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3738 t3 = vect_recog_temp_ssa_var (itype, NULL);
3739 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3740 build_int_cst (itype, post_shift));
3742 else
3743 t3 = t2;
3745 int msb = 1;
3746 value_range r;
3747 get_range_query (cfun)->range_of_expr (r, oprnd0);
3748 if (r.kind () == VR_RANGE)
3750 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
3751 msb = 0;
3752 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
3753 msb = -1;
3756 if (msb == 0 && d >= 0)
3758 /* q = t3; */
3759 q = t3;
3760 pattern_stmt = def_stmt;
3762 else
3764 /* t4 = oprnd0 >> (prec - 1);
3765 or if we know from VRP that oprnd0 >= 0
3766 t4 = 0;
3767 or if we know from VRP that oprnd0 < 0
3768 t4 = -1; */
3769 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3770 t4 = vect_recog_temp_ssa_var (itype, NULL);
3771 if (msb != 1)
3772 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3773 build_int_cst (itype, msb));
3774 else
3775 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3776 build_int_cst (itype, prec - 1));
3777 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3779 /* q = t3 - t4; or q = t4 - t3; */
3780 q = vect_recog_temp_ssa_var (itype, NULL);
3781 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3782 d < 0 ? t3 : t4);
3786 if (rhs_code == TRUNC_MOD_EXPR)
3788 tree r, t1;
3790 /* We divided. Now finish by:
3791 t1 = q * oprnd1;
3792 r = oprnd0 - t1; */
3793 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3795 t1 = vect_recog_temp_ssa_var (itype, NULL);
3796 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3797 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3799 r = vect_recog_temp_ssa_var (itype, NULL);
3800 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3803 /* Pattern detected. */
3804 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3806 *type_out = vectype;
3807 return pattern_stmt;
3810 /* Function vect_recog_mixed_size_cond_pattern
3812 Try to find the following pattern:
3814 type x_t, y_t;
3815 TYPE a_T, b_T, c_T;
3816 loop:
3817 S1 a_T = x_t CMP y_t ? b_T : c_T;
3819 where type 'TYPE' is an integral type which has different size
3820 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3821 than 'type', the constants need to fit into an integer type
3822 with the same width as 'type') or results of conversion from 'type'.
3824 Input:
3826 * STMT_VINFO: The stmt from which the pattern search begins.
3828 Output:
3830 * TYPE_OUT: The type of the output of this pattern.
3832 * Return value: A new stmt that will be used to replace the pattern.
3833 Additionally a def_stmt is added.
3835 a_it = x_t CMP y_t ? b_it : c_it;
3836 a_T = (TYPE) a_it; */
3838 static gimple *
3839 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3840 stmt_vec_info stmt_vinfo, tree *type_out)
3842 gimple *last_stmt = stmt_vinfo->stmt;
3843 tree cond_expr, then_clause, else_clause;
3844 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3845 gimple *pattern_stmt, *def_stmt;
3846 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3847 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3848 bool promotion;
3849 tree comp_scalar_type;
3851 if (!is_gimple_assign (last_stmt)
3852 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3853 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3854 return NULL;
3856 cond_expr = gimple_assign_rhs1 (last_stmt);
3857 then_clause = gimple_assign_rhs2 (last_stmt);
3858 else_clause = gimple_assign_rhs3 (last_stmt);
3860 if (!COMPARISON_CLASS_P (cond_expr))
3861 return NULL;
3863 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3864 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3865 if (comp_vectype == NULL_TREE)
3866 return NULL;
3868 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3869 if (types_compatible_p (type, comp_scalar_type)
3870 || ((TREE_CODE (then_clause) != INTEGER_CST
3871 || TREE_CODE (else_clause) != INTEGER_CST)
3872 && !INTEGRAL_TYPE_P (comp_scalar_type))
3873 || !INTEGRAL_TYPE_P (type))
3874 return NULL;
3876 if ((TREE_CODE (then_clause) != INTEGER_CST
3877 && !type_conversion_p (vinfo, then_clause, false,
3878 &orig_type0, &def_stmt0, &promotion))
3879 || (TREE_CODE (else_clause) != INTEGER_CST
3880 && !type_conversion_p (vinfo, else_clause, false,
3881 &orig_type1, &def_stmt1, &promotion)))
3882 return NULL;
3884 if (orig_type0 && orig_type1
3885 && !types_compatible_p (orig_type0, orig_type1))
3886 return NULL;
3888 if (orig_type0)
3890 if (!types_compatible_p (orig_type0, comp_scalar_type))
3891 return NULL;
3892 then_clause = gimple_assign_rhs1 (def_stmt0);
3893 itype = orig_type0;
3896 if (orig_type1)
3898 if (!types_compatible_p (orig_type1, comp_scalar_type))
3899 return NULL;
3900 else_clause = gimple_assign_rhs1 (def_stmt1);
3901 itype = orig_type1;
3905 HOST_WIDE_INT cmp_mode_size
3906 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3908 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3909 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3910 return NULL;
3912 vectype = get_vectype_for_scalar_type (vinfo, type);
3913 if (vectype == NULL_TREE)
3914 return NULL;
3916 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3917 return NULL;
3919 if (itype == NULL_TREE)
3920 itype = build_nonstandard_integer_type (cmp_mode_size,
3921 TYPE_UNSIGNED (type));
3923 if (itype == NULL_TREE
3924 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3925 return NULL;
3927 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3928 if (vecitype == NULL_TREE)
3929 return NULL;
3931 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3932 return NULL;
3934 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3936 if ((TREE_CODE (then_clause) == INTEGER_CST
3937 && !int_fits_type_p (then_clause, itype))
3938 || (TREE_CODE (else_clause) == INTEGER_CST
3939 && !int_fits_type_p (else_clause, itype)))
3940 return NULL;
3943 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3944 COND_EXPR, unshare_expr (cond_expr),
3945 fold_convert (itype, then_clause),
3946 fold_convert (itype, else_clause));
3947 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3948 NOP_EXPR, gimple_assign_lhs (def_stmt));
3950 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3951 *type_out = vectype;
3953 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3955 return pattern_stmt;
3959 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3960 true if bool VAR can and should be optimized that way. Assume it shouldn't
3961 in case it's a result of a comparison which can be directly vectorized into
3962 a vector comparison. Fills in STMTS with all stmts visited during the
3963 walk. */
3965 static bool
3966 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3968 tree rhs1;
3969 enum tree_code rhs_code;
3971 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3972 if (!def_stmt_info)
3973 return false;
3975 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3976 if (!def_stmt)
3977 return false;
3979 if (stmts.contains (def_stmt))
3980 return true;
3982 rhs1 = gimple_assign_rhs1 (def_stmt);
3983 rhs_code = gimple_assign_rhs_code (def_stmt);
3984 switch (rhs_code)
3986 case SSA_NAME:
3987 if (! check_bool_pattern (rhs1, vinfo, stmts))
3988 return false;
3989 break;
3991 CASE_CONVERT:
3992 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3993 return false;
3994 if (! check_bool_pattern (rhs1, vinfo, stmts))
3995 return false;
3996 break;
3998 case BIT_NOT_EXPR:
3999 if (! check_bool_pattern (rhs1, vinfo, stmts))
4000 return false;
4001 break;
4003 case BIT_AND_EXPR:
4004 case BIT_IOR_EXPR:
4005 case BIT_XOR_EXPR:
4006 if (! check_bool_pattern (rhs1, vinfo, stmts)
4007 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
4008 return false;
4009 break;
4011 default:
4012 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
4014 tree vecitype, comp_vectype;
4016 /* If the comparison can throw, then is_gimple_condexpr will be
4017 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
4018 if (stmt_could_throw_p (cfun, def_stmt))
4019 return false;
4021 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
4022 if (comp_vectype == NULL_TREE)
4023 return false;
4025 tree mask_type = get_mask_type_for_scalar_type (vinfo,
4026 TREE_TYPE (rhs1));
4027 if (mask_type
4028 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
4029 return false;
4031 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
4033 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4034 tree itype
4035 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4036 vecitype = get_vectype_for_scalar_type (vinfo, itype);
4037 if (vecitype == NULL_TREE)
4038 return false;
4040 else
4041 vecitype = comp_vectype;
4042 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
4043 return false;
4045 else
4046 return false;
4047 break;
4050 bool res = stmts.add (def_stmt);
4051 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
4052 gcc_assert (!res);
4054 return true;
4058 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
4059 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4060 pattern sequence. */
4062 static tree
4063 adjust_bool_pattern_cast (vec_info *vinfo,
4064 tree type, tree var, stmt_vec_info stmt_info)
4066 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4067 NOP_EXPR, var);
4068 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
4069 get_vectype_for_scalar_type (vinfo, type));
4070 return gimple_assign_lhs (cast_stmt);
4073 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
4074 VAR is an SSA_NAME that should be transformed from bool to a wider integer
4075 type, OUT_TYPE is the desired final integer type of the whole pattern.
4076 STMT_INFO is the info of the pattern root and is where pattern stmts should
4077 be associated with. DEFS is a map of pattern defs. */
4079 static void
4080 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
4081 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
4083 gimple *stmt = SSA_NAME_DEF_STMT (var);
4084 enum tree_code rhs_code, def_rhs_code;
4085 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
4086 location_t loc;
4087 gimple *pattern_stmt, *def_stmt;
4088 tree trueval = NULL_TREE;
4090 rhs1 = gimple_assign_rhs1 (stmt);
4091 rhs2 = gimple_assign_rhs2 (stmt);
4092 rhs_code = gimple_assign_rhs_code (stmt);
4093 loc = gimple_location (stmt);
4094 switch (rhs_code)
4096 case SSA_NAME:
4097 CASE_CONVERT:
4098 irhs1 = *defs.get (rhs1);
4099 itype = TREE_TYPE (irhs1);
4100 pattern_stmt
4101 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4102 SSA_NAME, irhs1);
4103 break;
4105 case BIT_NOT_EXPR:
4106 irhs1 = *defs.get (rhs1);
4107 itype = TREE_TYPE (irhs1);
4108 pattern_stmt
4109 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4110 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4111 break;
4113 case BIT_AND_EXPR:
4114 /* Try to optimize x = y & (a < b ? 1 : 0); into
4115 x = (a < b ? y : 0);
4117 E.g. for:
4118 bool a_b, b_b, c_b;
4119 TYPE d_T;
4121 S1 a_b = x1 CMP1 y1;
4122 S2 b_b = x2 CMP2 y2;
4123 S3 c_b = a_b & b_b;
4124 S4 d_T = (TYPE) c_b;
4126 we would normally emit:
4128 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4129 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4130 S3' c_T = a_T & b_T;
4131 S4' d_T = c_T;
4133 but we can save one stmt by using the
4134 result of one of the COND_EXPRs in the other COND_EXPR and leave
4135 BIT_AND_EXPR stmt out:
4137 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4138 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4139 S4' f_T = c_T;
4141 At least when VEC_COND_EXPR is implemented using masks
4142 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4143 computes the comparison masks and ands it, in one case with
4144 all ones vector, in the other case with a vector register.
4145 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4146 often more expensive. */
4147 def_stmt = SSA_NAME_DEF_STMT (rhs2);
4148 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4149 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4151 irhs1 = *defs.get (rhs1);
4152 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4153 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4154 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4156 rhs_code = def_rhs_code;
4157 rhs1 = def_rhs1;
4158 rhs2 = gimple_assign_rhs2 (def_stmt);
4159 trueval = irhs1;
4160 goto do_compare;
4162 else
4163 irhs2 = *defs.get (rhs2);
4164 goto and_ior_xor;
4166 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4167 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4168 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4170 irhs2 = *defs.get (rhs2);
4171 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4172 if (TYPE_PRECISION (TREE_TYPE (irhs2))
4173 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4175 rhs_code = def_rhs_code;
4176 rhs1 = def_rhs1;
4177 rhs2 = gimple_assign_rhs2 (def_stmt);
4178 trueval = irhs2;
4179 goto do_compare;
4181 else
4182 irhs1 = *defs.get (rhs1);
4183 goto and_ior_xor;
4185 /* FALLTHRU */
4186 case BIT_IOR_EXPR:
4187 case BIT_XOR_EXPR:
4188 irhs1 = *defs.get (rhs1);
4189 irhs2 = *defs.get (rhs2);
4190 and_ior_xor:
4191 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4192 != TYPE_PRECISION (TREE_TYPE (irhs2)))
4194 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4195 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4196 int out_prec = TYPE_PRECISION (out_type);
4197 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4198 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4199 stmt_info);
4200 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4201 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4202 stmt_info);
4203 else
4205 irhs1 = adjust_bool_pattern_cast (vinfo,
4206 out_type, irhs1, stmt_info);
4207 irhs2 = adjust_bool_pattern_cast (vinfo,
4208 out_type, irhs2, stmt_info);
4211 itype = TREE_TYPE (irhs1);
4212 pattern_stmt
4213 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4214 rhs_code, irhs1, irhs2);
4215 break;
4217 default:
4218 do_compare:
4219 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4220 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4221 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4222 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4223 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4225 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4226 itype
4227 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4229 else
4230 itype = TREE_TYPE (rhs1);
4231 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4232 if (trueval == NULL_TREE)
4233 trueval = build_int_cst (itype, 1);
4234 else
4235 gcc_checking_assert (useless_type_conversion_p (itype,
4236 TREE_TYPE (trueval)));
4237 pattern_stmt
4238 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4239 COND_EXPR, cond_expr, trueval,
4240 build_int_cst (itype, 0));
4241 break;
4244 gimple_set_location (pattern_stmt, loc);
4245 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4246 get_vectype_for_scalar_type (vinfo, itype));
4247 defs.put (var, gimple_assign_lhs (pattern_stmt));
4250 /* Comparison function to qsort a vector of gimple stmts after UID. */
4252 static int
4253 sort_after_uid (const void *p1, const void *p2)
4255 const gimple *stmt1 = *(const gimple * const *)p1;
4256 const gimple *stmt2 = *(const gimple * const *)p2;
4257 return gimple_uid (stmt1) - gimple_uid (stmt2);
4260 /* Create pattern stmts for all stmts participating in the bool pattern
4261 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4262 OUT_TYPE. Return the def of the pattern root. */
4264 static tree
4265 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4266 tree out_type, stmt_vec_info stmt_info)
4268 /* Gather original stmts in the bool pattern in their order of appearance
4269 in the IL. */
4270 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4271 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4272 i != bool_stmt_set.end (); ++i)
4273 bool_stmts.quick_push (*i);
4274 bool_stmts.qsort (sort_after_uid);
4276 /* Now process them in that order, producing pattern stmts. */
4277 hash_map <tree, tree> defs;
4278 for (unsigned i = 0; i < bool_stmts.length (); ++i)
4279 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4280 out_type, stmt_info, defs);
4282 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4283 gimple *pattern_stmt
4284 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4285 return gimple_assign_lhs (pattern_stmt);
4288 /* Return the proper type for converting bool VAR into
4289 an integer value or NULL_TREE if no such type exists.
4290 The type is chosen so that the converted value has the
4291 same number of elements as VAR's vector type. */
4293 static tree
4294 integer_type_for_mask (tree var, vec_info *vinfo)
4296 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4297 return NULL_TREE;
4299 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4300 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4301 return NULL_TREE;
4303 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4306 /* Function vect_recog_bool_pattern
4308 Try to find pattern like following:
4310 bool a_b, b_b, c_b, d_b, e_b;
4311 TYPE f_T;
4312 loop:
4313 S1 a_b = x1 CMP1 y1;
4314 S2 b_b = x2 CMP2 y2;
4315 S3 c_b = a_b & b_b;
4316 S4 d_b = x3 CMP3 y3;
4317 S5 e_b = c_b | d_b;
4318 S6 f_T = (TYPE) e_b;
4320 where type 'TYPE' is an integral type. Or a similar pattern
4321 ending in
4323 S6 f_Y = e_b ? r_Y : s_Y;
4325 as results from if-conversion of a complex condition.
4327 Input:
4329 * STMT_VINFO: The stmt at the end from which the pattern
4330 search begins, i.e. cast of a bool to
4331 an integer type.
4333 Output:
4335 * TYPE_OUT: The type of the output of this pattern.
4337 * Return value: A new stmt that will be used to replace the pattern.
4339 Assuming size of TYPE is the same as size of all comparisons
4340 (otherwise some casts would be added where needed), the above
4341 sequence we create related pattern stmts:
4342 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4343 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4344 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4345 S5' e_T = c_T | d_T;
4346 S6' f_T = e_T;
4348 Instead of the above S3' we could emit:
4349 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4350 S3' c_T = a_T | b_T;
4351 but the above is more efficient. */
4353 static gimple *
4354 vect_recog_bool_pattern (vec_info *vinfo,
4355 stmt_vec_info stmt_vinfo, tree *type_out)
4357 gimple *last_stmt = stmt_vinfo->stmt;
4358 enum tree_code rhs_code;
4359 tree var, lhs, rhs, vectype;
4360 gimple *pattern_stmt;
4362 if (!is_gimple_assign (last_stmt))
4363 return NULL;
4365 var = gimple_assign_rhs1 (last_stmt);
4366 lhs = gimple_assign_lhs (last_stmt);
4367 rhs_code = gimple_assign_rhs_code (last_stmt);
4369 if (rhs_code == VIEW_CONVERT_EXPR)
4370 var = TREE_OPERAND (var, 0);
4372 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4373 return NULL;
4375 hash_set<gimple *> bool_stmts;
4377 if (CONVERT_EXPR_CODE_P (rhs_code)
4378 || rhs_code == VIEW_CONVERT_EXPR)
4380 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4381 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4382 return NULL;
4383 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4385 if (check_bool_pattern (var, vinfo, bool_stmts))
4387 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4388 TREE_TYPE (lhs), stmt_vinfo);
4389 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4390 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4391 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4392 else
4393 pattern_stmt
4394 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4396 else
4398 tree type = integer_type_for_mask (var, vinfo);
4399 tree cst0, cst1, tmp;
4401 if (!type)
4402 return NULL;
4404 /* We may directly use cond with narrowed type to avoid
4405 multiple cond exprs with following result packing and
4406 perform single cond with packed mask instead. In case
4407 of widening we better make cond first and then extract
4408 results. */
4409 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4410 type = TREE_TYPE (lhs);
4412 cst0 = build_int_cst (type, 0);
4413 cst1 = build_int_cst (type, 1);
4414 tmp = vect_recog_temp_ssa_var (type, NULL);
4415 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4417 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4419 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4420 append_pattern_def_seq (vinfo, stmt_vinfo,
4421 pattern_stmt, new_vectype);
4423 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4424 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4428 *type_out = vectype;
4429 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4431 return pattern_stmt;
4433 else if (rhs_code == COND_EXPR
4434 && TREE_CODE (var) == SSA_NAME)
4436 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4437 if (vectype == NULL_TREE)
4438 return NULL;
4440 /* Build a scalar type for the boolean result that when
4441 vectorized matches the vector type of the result in
4442 size and number of elements. */
4443 unsigned prec
4444 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4445 TYPE_VECTOR_SUBPARTS (vectype));
4447 tree type
4448 = build_nonstandard_integer_type (prec,
4449 TYPE_UNSIGNED (TREE_TYPE (var)));
4450 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4451 return NULL;
4453 if (!check_bool_pattern (var, vinfo, bool_stmts))
4454 return NULL;
4456 rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4458 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4459 pattern_stmt
4460 = gimple_build_assign (lhs, COND_EXPR,
4461 build2 (NE_EXPR, boolean_type_node,
4462 rhs, build_int_cst (type, 0)),
4463 gimple_assign_rhs2 (last_stmt),
4464 gimple_assign_rhs3 (last_stmt));
4465 *type_out = vectype;
4466 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4468 return pattern_stmt;
4470 else if (rhs_code == SSA_NAME
4471 && STMT_VINFO_DATA_REF (stmt_vinfo))
4473 stmt_vec_info pattern_stmt_info;
4474 tree nunits_vectype;
4475 if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4476 &nunits_vectype)
4477 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4478 return NULL;
4480 if (check_bool_pattern (var, vinfo, bool_stmts))
4481 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4482 TREE_TYPE (vectype), stmt_vinfo);
4483 else
4485 tree type = integer_type_for_mask (var, vinfo);
4486 tree cst0, cst1, new_vectype;
4488 if (!type)
4489 return NULL;
4491 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4492 type = TREE_TYPE (vectype);
4494 cst0 = build_int_cst (type, 0);
4495 cst1 = build_int_cst (type, 1);
4496 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4498 rhs = vect_recog_temp_ssa_var (type, NULL);
4499 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4500 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4503 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4504 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4506 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4507 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4508 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4509 rhs = rhs2;
4511 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4512 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4513 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4514 *type_out = vectype;
4515 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4517 return pattern_stmt;
4519 else
4520 return NULL;
4524 /* A helper for vect_recog_mask_conversion_pattern. Build
4525 conversion of MASK to a type suitable for masking VECTYPE.
4526 Built statement gets required vectype and is appended to
4527 a pattern sequence of STMT_VINFO.
4529 Return converted mask. */
4531 static tree
4532 build_mask_conversion (vec_info *vinfo,
4533 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4535 gimple *stmt;
4536 tree masktype, tmp;
4538 masktype = truth_type_for (vectype);
4539 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4540 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4541 append_pattern_def_seq (vinfo, stmt_vinfo,
4542 stmt, masktype, TREE_TYPE (vectype));
4544 return tmp;
4548 /* Function vect_recog_mask_conversion_pattern
4550 Try to find statements which require boolean type
4551 converison. Additional conversion statements are
4552 added to handle such cases. For example:
4554 bool m_1, m_2, m_3;
4555 int i_4, i_5;
4556 double d_6, d_7;
4557 char c_1, c_2, c_3;
4559 S1 m_1 = i_4 > i_5;
4560 S2 m_2 = d_6 < d_7;
4561 S3 m_3 = m_1 & m_2;
4562 S4 c_1 = m_3 ? c_2 : c_3;
4564 Will be transformed into:
4566 S1 m_1 = i_4 > i_5;
4567 S2 m_2 = d_6 < d_7;
4568 S3'' m_2' = (_Bool[bitsize=32])m_2
4569 S3' m_3' = m_1 & m_2';
4570 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4571 S4' c_1' = m_3'' ? c_2 : c_3; */
4573 static gimple *
4574 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4575 stmt_vec_info stmt_vinfo, tree *type_out)
4577 gimple *last_stmt = stmt_vinfo->stmt;
4578 enum tree_code rhs_code;
4579 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4580 tree vectype1, vectype2;
4581 stmt_vec_info pattern_stmt_info;
4582 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4583 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4585 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4586 if (is_gimple_call (last_stmt)
4587 && gimple_call_internal_p (last_stmt))
4589 gcall *pattern_stmt;
4591 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4592 int mask_argno = internal_fn_mask_index (ifn);
4593 if (mask_argno < 0)
4594 return NULL;
4596 bool store_p = internal_store_fn_p (ifn);
4597 if (store_p)
4599 int rhs_index = internal_fn_stored_value_index (ifn);
4600 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4601 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4603 else
4605 lhs = gimple_call_lhs (last_stmt);
4606 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4609 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4610 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4611 if (!mask_arg_type)
4612 return NULL;
4613 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4615 if (!vectype1 || !vectype2
4616 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4617 TYPE_VECTOR_SUBPARTS (vectype2)))
4618 return NULL;
4620 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4622 auto_vec<tree, 8> args;
4623 unsigned int nargs = gimple_call_num_args (last_stmt);
4624 args.safe_grow (nargs, true);
4625 for (unsigned int i = 0; i < nargs; ++i)
4626 args[i] = ((int) i == mask_argno
4627 ? tmp
4628 : gimple_call_arg (last_stmt, i));
4629 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4631 if (!store_p)
4633 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4634 gimple_call_set_lhs (pattern_stmt, lhs);
4636 gimple_call_set_nothrow (pattern_stmt, true);
4638 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4639 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4640 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4642 *type_out = vectype1;
4643 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4645 return pattern_stmt;
4648 if (!is_gimple_assign (last_stmt))
4649 return NULL;
4651 gimple *pattern_stmt;
4652 lhs = gimple_assign_lhs (last_stmt);
4653 rhs1 = gimple_assign_rhs1 (last_stmt);
4654 rhs_code = gimple_assign_rhs_code (last_stmt);
4656 /* Check for cond expression requiring mask conversion. */
4657 if (rhs_code == COND_EXPR)
4659 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4661 if (TREE_CODE (rhs1) == SSA_NAME)
4663 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4664 if (!rhs1_type)
4665 return NULL;
4667 else if (COMPARISON_CLASS_P (rhs1))
4669 /* Check whether we're comparing scalar booleans and (if so)
4670 whether a better mask type exists than the mask associated
4671 with boolean-sized elements. This avoids unnecessary packs
4672 and unpacks if the booleans are set from comparisons of
4673 wider types. E.g. in:
4675 int x1, x2, x3, x4, y1, y1;
4677 bool b1 = (x1 == x2);
4678 bool b2 = (x3 == x4);
4679 ... = b1 == b2 ? y1 : y2;
4681 it is better for b1 and b2 to use the mask type associated
4682 with int elements rather bool (byte) elements. */
4683 rhs1_op0 = TREE_OPERAND (rhs1, 0);
4684 rhs1_op1 = TREE_OPERAND (rhs1, 1);
4685 if (!rhs1_op0 || !rhs1_op1)
4686 return NULL;
4687 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4688 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4690 if (!rhs1_op0_type)
4691 rhs1_type = TREE_TYPE (rhs1_op0);
4692 else if (!rhs1_op1_type)
4693 rhs1_type = TREE_TYPE (rhs1_op1);
4694 else if (TYPE_PRECISION (rhs1_op0_type)
4695 != TYPE_PRECISION (rhs1_op1_type))
4697 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4698 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4699 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4700 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4701 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4703 if (abs (tmp0) > abs (tmp1))
4704 rhs1_type = rhs1_op1_type;
4705 else
4706 rhs1_type = rhs1_op0_type;
4708 else
4709 rhs1_type = build_nonstandard_integer_type
4710 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4712 else
4713 rhs1_type = rhs1_op0_type;
4715 else
4716 return NULL;
4718 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4720 if (!vectype1 || !vectype2)
4721 return NULL;
4723 /* Continue if a conversion is needed. Also continue if we have
4724 a comparison whose vector type would normally be different from
4725 VECTYPE2 when considered in isolation. In that case we'll
4726 replace the comparison with an SSA name (so that we can record
4727 its vector type) and behave as though the comparison was an SSA
4728 name from the outset. */
4729 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4730 TYPE_VECTOR_SUBPARTS (vectype2))
4731 && !rhs1_op0_type
4732 && !rhs1_op1_type)
4733 return NULL;
4735 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4736 in place, we can handle it in vectorizable_condition. This avoids
4737 unnecessary promotion stmts and increased vectorization factor. */
4738 if (COMPARISON_CLASS_P (rhs1)
4739 && INTEGRAL_TYPE_P (rhs1_type)
4740 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4741 TYPE_VECTOR_SUBPARTS (vectype2)))
4743 enum vect_def_type dt;
4744 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4745 && dt == vect_external_def
4746 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4747 && (dt == vect_external_def
4748 || dt == vect_constant_def))
4750 tree wide_scalar_type = build_nonstandard_integer_type
4751 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4752 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4753 wide_scalar_type);
4754 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4755 return NULL;
4759 /* If rhs1 is a comparison we need to move it into a
4760 separate statement. */
4761 if (TREE_CODE (rhs1) != SSA_NAME)
4763 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4764 if (rhs1_op0_type
4765 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4766 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4767 vectype2, stmt_vinfo);
4768 if (rhs1_op1_type
4769 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4770 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4771 vectype2, stmt_vinfo);
4772 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4773 rhs1_op0, rhs1_op1);
4774 rhs1 = tmp;
4775 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4776 rhs1_type);
4779 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4780 TYPE_VECTOR_SUBPARTS (vectype2)))
4781 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4782 else
4783 tmp = rhs1;
4785 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4786 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4787 gimple_assign_rhs2 (last_stmt),
4788 gimple_assign_rhs3 (last_stmt));
4790 *type_out = vectype1;
4791 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4793 return pattern_stmt;
4796 /* Now check for binary boolean operations requiring conversion for
4797 one of operands. */
4798 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4799 return NULL;
4801 if (rhs_code != BIT_IOR_EXPR
4802 && rhs_code != BIT_XOR_EXPR
4803 && rhs_code != BIT_AND_EXPR
4804 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4805 return NULL;
4807 rhs2 = gimple_assign_rhs2 (last_stmt);
4809 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4810 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4812 if (!rhs1_type || !rhs2_type
4813 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4814 return NULL;
4816 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4818 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4819 if (!vectype1)
4820 return NULL;
4821 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4823 else
4825 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4826 if (!vectype1)
4827 return NULL;
4828 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4831 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4832 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4834 *type_out = vectype1;
4835 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4837 return pattern_stmt;
4840 /* STMT_INFO is a load or store. If the load or store is conditional, return
4841 the boolean condition under which it occurs, otherwise return null. */
4843 static tree
4844 vect_get_load_store_mask (stmt_vec_info stmt_info)
4846 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4848 gcc_assert (gimple_assign_single_p (def_assign));
4849 return NULL_TREE;
4852 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4854 internal_fn ifn = gimple_call_internal_fn (def_call);
4855 int mask_index = internal_fn_mask_index (ifn);
4856 return gimple_call_arg (def_call, mask_index);
4859 gcc_unreachable ();
4862 /* Return MASK if MASK is suitable for masking an operation on vectors
4863 of type VECTYPE, otherwise convert it into such a form and return
4864 the result. Associate any conversion statements with STMT_INFO's
4865 pattern. */
4867 static tree
4868 vect_convert_mask_for_vectype (tree mask, tree vectype,
4869 stmt_vec_info stmt_info, vec_info *vinfo)
4871 tree mask_type = integer_type_for_mask (mask, vinfo);
4872 if (mask_type)
4874 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4875 if (mask_vectype
4876 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4877 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4878 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4880 return mask;
4883 /* Return the equivalent of:
4885 fold_convert (TYPE, VALUE)
4887 with the expectation that the operation will be vectorized.
4888 If new statements are needed, add them as pattern statements
4889 to STMT_INFO. */
4891 static tree
4892 vect_add_conversion_to_pattern (vec_info *vinfo,
4893 tree type, tree value, stmt_vec_info stmt_info)
4895 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4896 return value;
4898 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4899 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4900 append_pattern_def_seq (vinfo, stmt_info, conversion,
4901 get_vectype_for_scalar_type (vinfo, type));
4902 return new_value;
4905 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4906 internal function. Return the final statement on success and set
4907 *TYPE_OUT to the vector type being loaded or stored.
4909 This function only handles gathers and scatters that were recognized
4910 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4912 static gimple *
4913 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4914 stmt_vec_info stmt_info, tree *type_out)
4916 /* Currently we only support this for loop vectorization. */
4917 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4918 if (!loop_vinfo)
4919 return NULL;
4921 /* Make sure that we're looking at a gather load or scatter store. */
4922 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4923 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4924 return NULL;
4926 /* Get the boolean that controls whether the load or store happens.
4927 This is null if the operation is unconditional. */
4928 tree mask = vect_get_load_store_mask (stmt_info);
4930 /* Make sure that the target supports an appropriate internal
4931 function for the gather/scatter operation. */
4932 gather_scatter_info gs_info;
4933 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4934 || gs_info.ifn == IFN_LAST)
4935 return NULL;
4937 /* Convert the mask to the right form. */
4938 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4939 gs_info.element_type);
4940 if (mask)
4941 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4942 loop_vinfo);
4943 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
4944 || gs_info.ifn == IFN_MASK_GATHER_LOAD)
4945 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
4947 /* Get the invariant base and non-invariant offset, converting the
4948 latter to the same width as the vector elements. */
4949 tree base = gs_info.base;
4950 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4951 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4952 gs_info.offset, stmt_info);
4954 /* Build the new pattern statement. */
4955 tree scale = size_int (gs_info.scale);
4956 gcall *pattern_stmt;
4957 if (DR_IS_READ (dr))
4959 tree zero = build_zero_cst (gs_info.element_type);
4960 if (mask != NULL)
4961 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4962 offset, scale, zero, mask);
4963 else
4964 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4965 offset, scale, zero);
4966 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4967 gimple_call_set_lhs (pattern_stmt, load_lhs);
4969 else
4971 tree rhs = vect_get_store_rhs (stmt_info);
4972 if (mask != NULL)
4973 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
4974 base, offset, scale, rhs,
4975 mask);
4976 else
4977 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
4978 base, offset, scale, rhs);
4980 gimple_call_set_nothrow (pattern_stmt, true);
4982 /* Copy across relevant vectorization info and associate DR with the
4983 new pattern statement instead of the original statement. */
4984 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4985 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4987 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4988 *type_out = vectype;
4989 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4991 return pattern_stmt;
4994 /* Return true if TYPE is a non-boolean integer type. These are the types
4995 that we want to consider for narrowing. */
4997 static bool
4998 vect_narrowable_type_p (tree type)
5000 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
5003 /* Return true if the operation given by CODE can be truncated to N bits
5004 when only N bits of the output are needed. This is only true if bit N+1
5005 of the inputs has no effect on the low N bits of the result. */
5007 static bool
5008 vect_truncatable_operation_p (tree_code code)
5010 switch (code)
5012 case PLUS_EXPR:
5013 case MINUS_EXPR:
5014 case MULT_EXPR:
5015 case BIT_AND_EXPR:
5016 case BIT_IOR_EXPR:
5017 case BIT_XOR_EXPR:
5018 case COND_EXPR:
5019 return true;
5021 default:
5022 return false;
5026 /* Record that STMT_INFO could be changed from operating on TYPE to
5027 operating on a type with the precision and sign given by PRECISION
5028 and SIGN respectively. PRECISION is an arbitrary bit precision;
5029 it might not be a whole number of bytes. */
5031 static void
5032 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
5033 unsigned int precision, signop sign)
5035 /* Round the precision up to a whole number of bytes. */
5036 precision = vect_element_precision (precision);
5037 if (precision < TYPE_PRECISION (type)
5038 && (!stmt_info->operation_precision
5039 || stmt_info->operation_precision > precision))
5041 stmt_info->operation_precision = precision;
5042 stmt_info->operation_sign = sign;
5046 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5047 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
5048 is an arbitrary bit precision; it might not be a whole number of bytes. */
5050 static void
5051 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
5052 unsigned int min_input_precision)
5054 /* This operation in isolation only requires the inputs to have
5055 MIN_INPUT_PRECISION of precision, However, that doesn't mean
5056 that MIN_INPUT_PRECISION is a natural precision for the chain
5057 as a whole. E.g. consider something like:
5059 unsigned short *x, *y;
5060 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5062 The right shift can be done on unsigned chars, and only requires the
5063 result of "*x & 0xf0" to be done on unsigned chars. But taking that
5064 approach would mean turning a natural chain of single-vector unsigned
5065 short operations into one that truncates "*x" and then extends
5066 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5067 operation and one vector for each unsigned char operation.
5068 This would be a significant pessimization.
5070 Instead only propagate the maximum of this precision and the precision
5071 required by the users of the result. This means that we don't pessimize
5072 the case above but continue to optimize things like:
5074 unsigned char *y;
5075 unsigned short *x;
5076 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5078 Here we would truncate two vectors of *x to a single vector of
5079 unsigned chars and use single-vector unsigned char operations for
5080 everything else, rather than doing two unsigned short copies of
5081 "(*x & 0xf0) >> 4" and then truncating the result. */
5082 min_input_precision = MAX (min_input_precision,
5083 stmt_info->min_output_precision);
5085 if (min_input_precision < TYPE_PRECISION (type)
5086 && (!stmt_info->min_input_precision
5087 || stmt_info->min_input_precision > min_input_precision))
5088 stmt_info->min_input_precision = min_input_precision;
5091 /* Subroutine of vect_determine_min_output_precision. Return true if
5092 we can calculate a reduced number of output bits for STMT_INFO,
5093 whose result is LHS. */
5095 static bool
5096 vect_determine_min_output_precision_1 (vec_info *vinfo,
5097 stmt_vec_info stmt_info, tree lhs)
5099 /* Take the maximum precision required by users of the result. */
5100 unsigned int precision = 0;
5101 imm_use_iterator iter;
5102 use_operand_p use;
5103 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5105 gimple *use_stmt = USE_STMT (use);
5106 if (is_gimple_debug (use_stmt))
5107 continue;
5108 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5109 if (!use_stmt_info || !use_stmt_info->min_input_precision)
5110 return false;
5111 /* The input precision recorded for COND_EXPRs applies only to the
5112 "then" and "else" values. */
5113 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5114 if (assign
5115 && gimple_assign_rhs_code (assign) == COND_EXPR
5116 && use->use != gimple_assign_rhs2_ptr (assign)
5117 && use->use != gimple_assign_rhs3_ptr (assign))
5118 return false;
5119 precision = MAX (precision, use_stmt_info->min_input_precision);
5122 if (dump_enabled_p ())
5123 dump_printf_loc (MSG_NOTE, vect_location,
5124 "only the low %d bits of %T are significant\n",
5125 precision, lhs);
5126 stmt_info->min_output_precision = precision;
5127 return true;
5130 /* Calculate min_output_precision for STMT_INFO. */
5132 static void
5133 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5135 /* We're only interested in statements with a narrowable result. */
5136 tree lhs = gimple_get_lhs (stmt_info->stmt);
5137 if (!lhs
5138 || TREE_CODE (lhs) != SSA_NAME
5139 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5140 return;
5142 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5143 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5146 /* Use range information to decide whether STMT (described by STMT_INFO)
5147 could be done in a narrower type. This is effectively a forward
5148 propagation, since it uses context-independent information that applies
5149 to all users of an SSA name. */
5151 static void
5152 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5154 tree lhs = gimple_assign_lhs (stmt);
5155 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5156 return;
5158 tree type = TREE_TYPE (lhs);
5159 if (!vect_narrowable_type_p (type))
5160 return;
5162 /* First see whether we have any useful range information for the result. */
5163 unsigned int precision = TYPE_PRECISION (type);
5164 signop sign = TYPE_SIGN (type);
5165 wide_int min_value, max_value;
5166 if (!vect_get_range_info (lhs, &min_value, &max_value))
5167 return;
5169 tree_code code = gimple_assign_rhs_code (stmt);
5170 unsigned int nops = gimple_num_ops (stmt);
5172 if (!vect_truncatable_operation_p (code))
5173 /* Check that all relevant input operands are compatible, and update
5174 [MIN_VALUE, MAX_VALUE] to include their ranges. */
5175 for (unsigned int i = 1; i < nops; ++i)
5177 tree op = gimple_op (stmt, i);
5178 if (TREE_CODE (op) == INTEGER_CST)
5180 /* Don't require the integer to have RHS_TYPE (which it might
5181 not for things like shift amounts, etc.), but do require it
5182 to fit the type. */
5183 if (!int_fits_type_p (op, type))
5184 return;
5186 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
5187 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
5189 else if (TREE_CODE (op) == SSA_NAME)
5191 /* Ignore codes that don't take uniform arguments. */
5192 if (!types_compatible_p (TREE_TYPE (op), type))
5193 return;
5195 wide_int op_min_value, op_max_value;
5196 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5197 return;
5199 min_value = wi::min (min_value, op_min_value, sign);
5200 max_value = wi::max (max_value, op_max_value, sign);
5202 else
5203 return;
5206 /* Try to switch signed types for unsigned types if we can.
5207 This is better for two reasons. First, unsigned ops tend
5208 to be cheaper than signed ops. Second, it means that we can
5209 handle things like:
5211 signed char c;
5212 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5216 signed char c;
5217 unsigned short res_1 = (unsigned short) c & 0xff00;
5218 int res = (int) res_1;
5220 where the intermediate result res_1 has unsigned rather than
5221 signed type. */
5222 if (sign == SIGNED && !wi::neg_p (min_value))
5223 sign = UNSIGNED;
5225 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
5226 unsigned int precision1 = wi::min_precision (min_value, sign);
5227 unsigned int precision2 = wi::min_precision (max_value, sign);
5228 unsigned int value_precision = MAX (precision1, precision2);
5229 if (value_precision >= precision)
5230 return;
5232 if (dump_enabled_p ())
5233 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5234 " without loss of precision: %G",
5235 sign == SIGNED ? "signed" : "unsigned",
5236 value_precision, stmt);
5238 vect_set_operation_type (stmt_info, type, value_precision, sign);
5239 vect_set_min_input_precision (stmt_info, type, value_precision);
5242 /* Use information about the users of STMT's result to decide whether
5243 STMT (described by STMT_INFO) could be done in a narrower type.
5244 This is effectively a backward propagation. */
5246 static void
5247 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5249 tree_code code = gimple_assign_rhs_code (stmt);
5250 unsigned int opno = (code == COND_EXPR ? 2 : 1);
5251 tree type = TREE_TYPE (gimple_op (stmt, opno));
5252 if (!vect_narrowable_type_p (type))
5253 return;
5255 unsigned int precision = TYPE_PRECISION (type);
5256 unsigned int operation_precision, min_input_precision;
5257 switch (code)
5259 CASE_CONVERT:
5260 /* Only the bits that contribute to the output matter. Don't change
5261 the precision of the operation itself. */
5262 operation_precision = precision;
5263 min_input_precision = stmt_info->min_output_precision;
5264 break;
5266 case LSHIFT_EXPR:
5267 case RSHIFT_EXPR:
5269 tree shift = gimple_assign_rhs2 (stmt);
5270 if (TREE_CODE (shift) != INTEGER_CST
5271 || !wi::ltu_p (wi::to_widest (shift), precision))
5272 return;
5273 unsigned int const_shift = TREE_INT_CST_LOW (shift);
5274 if (code == LSHIFT_EXPR)
5276 /* Avoid creating an undefined shift.
5278 ??? We could instead use min_output_precision as-is and
5279 optimize out-of-range shifts to zero. However, only
5280 degenerate testcases shift away all their useful input data,
5281 and it isn't natural to drop input operations in the middle
5282 of vectorization. This sort of thing should really be
5283 handled before vectorization. */
5284 operation_precision = MAX (stmt_info->min_output_precision,
5285 const_shift + 1);
5286 /* We need CONST_SHIFT fewer bits of the input. */
5287 min_input_precision = (MAX (operation_precision, const_shift)
5288 - const_shift);
5290 else
5292 /* We need CONST_SHIFT extra bits to do the operation. */
5293 operation_precision = (stmt_info->min_output_precision
5294 + const_shift);
5295 min_input_precision = operation_precision;
5297 break;
5300 default:
5301 if (vect_truncatable_operation_p (code))
5303 /* Input bit N has no effect on output bits N-1 and lower. */
5304 operation_precision = stmt_info->min_output_precision;
5305 min_input_precision = operation_precision;
5306 break;
5308 return;
5311 if (operation_precision < precision)
5313 if (dump_enabled_p ())
5314 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5315 " without affecting users: %G",
5316 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5317 operation_precision, stmt);
5318 vect_set_operation_type (stmt_info, type, operation_precision,
5319 TYPE_SIGN (type));
5321 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5324 /* Return true if the statement described by STMT_INFO sets a boolean
5325 SSA_NAME and if we know how to vectorize this kind of statement using
5326 vector mask types. */
5328 static bool
5329 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5331 tree lhs = gimple_get_lhs (stmt_info->stmt);
5332 if (!lhs
5333 || TREE_CODE (lhs) != SSA_NAME
5334 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5335 return false;
5337 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5339 tree_code rhs_code = gimple_assign_rhs_code (assign);
5340 switch (rhs_code)
5342 CASE_CONVERT:
5343 case SSA_NAME:
5344 case BIT_NOT_EXPR:
5345 case BIT_IOR_EXPR:
5346 case BIT_XOR_EXPR:
5347 case BIT_AND_EXPR:
5348 return true;
5350 default:
5351 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5354 else if (is_a <gphi *> (stmt_info->stmt))
5355 return true;
5356 return false;
5359 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5360 a vector mask type instead of a normal vector type. Record the
5361 result in STMT_INFO->mask_precision. */
5363 static void
5364 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5366 if (!possible_vector_mask_operation_p (stmt_info))
5367 return;
5369 /* If at least one boolean input uses a vector mask type,
5370 pick the mask type with the narrowest elements.
5372 ??? This is the traditional behavior. It should always produce
5373 the smallest number of operations, but isn't necessarily the
5374 optimal choice. For example, if we have:
5376 a = b & c
5378 where:
5380 - the user of a wants it to have a mask type for 16-bit elements (M16)
5381 - b also uses M16
5382 - c uses a mask type for 8-bit elements (M8)
5384 then picking M8 gives:
5386 - 1 M16->M8 pack for b
5387 - 1 M8 AND for a
5388 - 2 M8->M16 unpacks for the user of a
5390 whereas picking M16 would have given:
5392 - 2 M8->M16 unpacks for c
5393 - 2 M16 ANDs for a
5395 The number of operations are equal, but M16 would have given
5396 a shorter dependency chain and allowed more ILP. */
5397 unsigned int precision = ~0U;
5398 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5400 unsigned int nops = gimple_num_ops (assign);
5401 for (unsigned int i = 1; i < nops; ++i)
5403 tree rhs = gimple_op (assign, i);
5404 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5405 continue;
5407 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5408 if (!def_stmt_info)
5409 /* Don't let external or constant operands influence the choice.
5410 We can convert them to whichever vector type we pick. */
5411 continue;
5413 if (def_stmt_info->mask_precision)
5415 if (precision > def_stmt_info->mask_precision)
5416 precision = def_stmt_info->mask_precision;
5420 /* If the statement compares two values that shouldn't use vector masks,
5421 try comparing the values as normal scalars instead. */
5422 tree_code rhs_code = gimple_assign_rhs_code (assign);
5423 if (precision == ~0U
5424 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5426 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5427 scalar_mode mode;
5428 tree vectype, mask_type;
5429 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5430 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5431 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5432 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5433 precision = GET_MODE_BITSIZE (mode);
5436 else
5438 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5439 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5441 tree rhs = gimple_phi_arg_def (phi, i);
5443 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5444 if (!def_stmt_info)
5445 /* Don't let external or constant operands influence the choice.
5446 We can convert them to whichever vector type we pick. */
5447 continue;
5449 if (def_stmt_info->mask_precision)
5451 if (precision > def_stmt_info->mask_precision)
5452 precision = def_stmt_info->mask_precision;
5457 if (dump_enabled_p ())
5459 if (precision == ~0U)
5460 dump_printf_loc (MSG_NOTE, vect_location,
5461 "using normal nonmask vectors for %G",
5462 stmt_info->stmt);
5463 else
5464 dump_printf_loc (MSG_NOTE, vect_location,
5465 "using boolean precision %d for %G",
5466 precision, stmt_info->stmt);
5469 stmt_info->mask_precision = precision;
5472 /* Handle vect_determine_precisions for STMT_INFO, given that we
5473 have already done so for the users of its result. */
5475 void
5476 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5478 vect_determine_min_output_precision (vinfo, stmt_info);
5479 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5481 vect_determine_precisions_from_range (stmt_info, stmt);
5482 vect_determine_precisions_from_users (stmt_info, stmt);
5486 /* Walk backwards through the vectorizable region to determine the
5487 values of these fields:
5489 - min_output_precision
5490 - min_input_precision
5491 - operation_precision
5492 - operation_sign. */
5494 void
5495 vect_determine_precisions (vec_info *vinfo)
5497 DUMP_VECT_SCOPE ("vect_determine_precisions");
5499 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5501 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5502 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5503 unsigned int nbbs = loop->num_nodes;
5505 for (unsigned int i = 0; i < nbbs; i++)
5507 basic_block bb = bbs[i];
5508 for (auto gsi = gsi_start_phis (bb);
5509 !gsi_end_p (gsi); gsi_next (&gsi))
5511 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5512 if (stmt_info)
5513 vect_determine_mask_precision (vinfo, stmt_info);
5515 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5516 if (!is_gimple_debug (gsi_stmt (si)))
5517 vect_determine_mask_precision
5518 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5520 for (unsigned int i = 0; i < nbbs; i++)
5522 basic_block bb = bbs[nbbs - i - 1];
5523 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5524 !gsi_end_p (si); gsi_prev (&si))
5525 if (!is_gimple_debug (gsi_stmt (si)))
5526 vect_determine_stmt_precisions
5527 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5528 for (auto gsi = gsi_start_phis (bb);
5529 !gsi_end_p (gsi); gsi_next (&gsi))
5531 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5532 if (stmt_info)
5533 vect_determine_stmt_precisions (vinfo, stmt_info);
5537 else
5539 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5540 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5542 basic_block bb = bb_vinfo->bbs[i];
5543 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5545 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5546 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5547 vect_determine_mask_precision (vinfo, stmt_info);
5549 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5551 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5552 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5553 vect_determine_mask_precision (vinfo, stmt_info);
5556 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5558 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5559 !gsi_end_p (gsi); gsi_prev (&gsi))
5561 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5562 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5563 vect_determine_stmt_precisions (vinfo, stmt_info);
5565 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5566 !gsi_end_p (gsi); gsi_next (&gsi))
5568 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5569 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5570 vect_determine_stmt_precisions (vinfo, stmt_info);
5576 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5578 struct vect_recog_func
5580 vect_recog_func_ptr fn;
5581 const char *name;
5584 /* Note that ordering matters - the first pattern matching on a stmt is
5585 taken which means usually the more complex one needs to preceed the
5586 less comples onex (widen_sum only after dot_prod or sad for example). */
5587 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5588 { vect_recog_over_widening_pattern, "over_widening" },
5589 /* Must come after over_widening, which narrows the shift as much as
5590 possible beforehand. */
5591 { vect_recog_average_pattern, "average" },
5592 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
5593 { vect_recog_mulhs_pattern, "mult_high" },
5594 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5595 { vect_recog_widen_mult_pattern, "widen_mult" },
5596 { vect_recog_dot_prod_pattern, "dot_prod" },
5597 { vect_recog_sad_pattern, "sad" },
5598 { vect_recog_widen_sum_pattern, "widen_sum" },
5599 { vect_recog_pow_pattern, "pow" },
5600 { vect_recog_popcount_pattern, "popcount" },
5601 { vect_recog_widen_shift_pattern, "widen_shift" },
5602 { vect_recog_rotate_pattern, "rotate" },
5603 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5604 { vect_recog_divmod_pattern, "divmod" },
5605 { vect_recog_mult_pattern, "mult" },
5606 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5607 { vect_recog_bool_pattern, "bool" },
5608 /* This must come before mask conversion, and includes the parts
5609 of mask conversion that are needed for gather and scatter
5610 internal functions. */
5611 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5612 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5613 { vect_recog_widen_plus_pattern, "widen_plus" },
5614 { vect_recog_widen_minus_pattern, "widen_minus" },
5617 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5619 /* Mark statements that are involved in a pattern. */
5621 void
5622 vect_mark_pattern_stmts (vec_info *vinfo,
5623 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5624 tree pattern_vectype)
5626 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5627 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5629 gimple *orig_pattern_stmt = NULL;
5630 if (is_pattern_stmt_p (orig_stmt_info))
5632 /* We're replacing a statement in an existing pattern definition
5633 sequence. */
5634 orig_pattern_stmt = orig_stmt_info->stmt;
5635 if (dump_enabled_p ())
5636 dump_printf_loc (MSG_NOTE, vect_location,
5637 "replacing earlier pattern %G", orig_pattern_stmt);
5639 /* To keep the book-keeping simple, just swap the lhs of the
5640 old and new statements, so that the old one has a valid but
5641 unused lhs. */
5642 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5643 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5644 gimple_set_lhs (pattern_stmt, old_lhs);
5646 if (dump_enabled_p ())
5647 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5649 /* Switch to the statement that ORIG replaces. */
5650 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5652 /* We shouldn't be replacing the main pattern statement. */
5653 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5654 != orig_pattern_stmt);
5657 if (def_seq)
5658 for (gimple_stmt_iterator si = gsi_start (def_seq);
5659 !gsi_end_p (si); gsi_next (&si))
5661 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_NOTE, vect_location,
5663 "extra pattern stmt: %G", gsi_stmt (si));
5664 stmt_vec_info pattern_stmt_info
5665 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5666 orig_stmt_info, pattern_vectype);
5667 /* Stmts in the def sequence are not vectorizable cycle or
5668 induction defs, instead they should all be vect_internal_def
5669 feeding the main pattern stmt which retains this def type. */
5670 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5673 if (orig_pattern_stmt)
5675 vect_init_pattern_stmt (vinfo, pattern_stmt,
5676 orig_stmt_info, pattern_vectype);
5678 /* Insert all the new pattern statements before the original one. */
5679 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5680 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5681 orig_def_seq);
5682 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5683 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5685 /* Remove the pattern statement that this new pattern replaces. */
5686 gsi_remove (&gsi, false);
5688 else
5689 vect_set_pattern_stmt (vinfo,
5690 pattern_stmt, orig_stmt_info, pattern_vectype);
5692 /* Transfer reduction path info to the pattern. */
5693 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5695 gimple_match_op op;
5696 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
5697 gcc_unreachable ();
5698 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
5699 /* Search the pattern def sequence and the main pattern stmt. Note
5700 we may have inserted all into a containing pattern def sequence
5701 so the following is a bit awkward. */
5702 gimple_stmt_iterator si;
5703 gimple *s;
5704 if (def_seq)
5706 si = gsi_start (def_seq);
5707 s = gsi_stmt (si);
5708 gsi_next (&si);
5710 else
5712 si = gsi_none ();
5713 s = pattern_stmt;
5717 bool found = false;
5718 if (gimple_extract_op (s, &op))
5719 for (unsigned i = 0; i < op.num_ops; ++i)
5720 if (op.ops[i] == lookfor)
5722 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
5723 lookfor = gimple_get_lhs (s);
5724 found = true;
5725 break;
5727 if (s == pattern_stmt)
5729 if (!found && dump_enabled_p ())
5730 dump_printf_loc (MSG_NOTE, vect_location,
5731 "failed to update reduction index.\n");
5732 break;
5734 if (gsi_end_p (si))
5735 s = pattern_stmt;
5736 else
5738 s = gsi_stmt (si);
5739 if (s == pattern_stmt)
5740 /* Found the end inside a bigger pattern def seq. */
5741 si = gsi_none ();
5742 else
5743 gsi_next (&si);
5745 } while (1);
5749 /* Function vect_pattern_recog_1
5751 Input:
5752 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5753 computation pattern.
5754 STMT_INFO: A stmt from which the pattern search should start.
5756 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5757 a sequence of statements that has the same functionality and can be
5758 used to replace STMT_INFO. It returns the last statement in the sequence
5759 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5760 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5761 statement, having first checked that the target supports the new operation
5762 in that type.
5764 This function also does some bookkeeping, as explained in the documentation
5765 for vect_recog_pattern. */
5767 static void
5768 vect_pattern_recog_1 (vec_info *vinfo,
5769 vect_recog_func *recog_func, stmt_vec_info stmt_info)
5771 gimple *pattern_stmt;
5772 loop_vec_info loop_vinfo;
5773 tree pattern_vectype;
5775 /* If this statement has already been replaced with pattern statements,
5776 leave the original statement alone, since the first match wins.
5777 Instead try to match against the definition statements that feed
5778 the main pattern statement. */
5779 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5781 gimple_stmt_iterator gsi;
5782 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5783 !gsi_end_p (gsi); gsi_next (&gsi))
5784 vect_pattern_recog_1 (vinfo, recog_func,
5785 vinfo->lookup_stmt (gsi_stmt (gsi)));
5786 return;
5789 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5790 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5791 if (!pattern_stmt)
5793 /* Clear any half-formed pattern definition sequence. */
5794 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5795 return;
5798 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5800 /* Found a vectorizable pattern. */
5801 if (dump_enabled_p ())
5802 dump_printf_loc (MSG_NOTE, vect_location,
5803 "%s pattern recognized: %G",
5804 recog_func->name, pattern_stmt);
5806 /* Mark the stmts that are involved in the pattern. */
5807 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5809 /* Patterns cannot be vectorized using SLP, because they change the order of
5810 computation. */
5811 if (loop_vinfo)
5813 unsigned ix, ix2;
5814 stmt_vec_info *elem_ptr;
5815 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5816 elem_ptr, *elem_ptr == stmt_info);
5821 /* Function vect_pattern_recog
5823 Input:
5824 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5825 computation idioms.
5827 Output - for each computation idiom that is detected we create a new stmt
5828 that provides the same functionality and that can be vectorized. We
5829 also record some information in the struct_stmt_info of the relevant
5830 stmts, as explained below:
5832 At the entry to this function we have the following stmts, with the
5833 following initial value in the STMT_VINFO fields:
5835 stmt in_pattern_p related_stmt vec_stmt
5836 S1: a_i = .... - - -
5837 S2: a_2 = ..use(a_i).. - - -
5838 S3: a_1 = ..use(a_2).. - - -
5839 S4: a_0 = ..use(a_1).. - - -
5840 S5: ... = ..use(a_0).. - - -
5842 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5843 represented by a single stmt. We then:
5844 - create a new stmt S6 equivalent to the pattern (the stmt is not
5845 inserted into the code)
5846 - fill in the STMT_VINFO fields as follows:
5848 in_pattern_p related_stmt vec_stmt
5849 S1: a_i = .... - - -
5850 S2: a_2 = ..use(a_i).. - - -
5851 S3: a_1 = ..use(a_2).. - - -
5852 S4: a_0 = ..use(a_1).. true S6 -
5853 '---> S6: a_new = .... - S4 -
5854 S5: ... = ..use(a_0).. - - -
5856 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5857 to each other through the RELATED_STMT field).
5859 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5860 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5861 remain irrelevant unless used by stmts other than S4.
5863 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5864 (because they are marked as irrelevant). It will vectorize S6, and record
5865 a pointer to the new vector stmt VS6 from S6 (as usual).
5866 S4 will be skipped, and S5 will be vectorized as usual:
5868 in_pattern_p related_stmt vec_stmt
5869 S1: a_i = .... - - -
5870 S2: a_2 = ..use(a_i).. - - -
5871 S3: a_1 = ..use(a_2).. - - -
5872 > VS6: va_new = .... - - -
5873 S4: a_0 = ..use(a_1).. true S6 VS6
5874 '---> S6: a_new = .... - S4 VS6
5875 > VS5: ... = ..vuse(va_new).. - - -
5876 S5: ... = ..use(a_0).. - - -
5878 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5879 elsewhere), and we'll end up with:
5881 VS6: va_new = ....
5882 VS5: ... = ..vuse(va_new)..
5884 In case of more than one pattern statements, e.g., widen-mult with
5885 intermediate type:
5887 S1 a_t = ;
5888 S2 a_T = (TYPE) a_t;
5889 '--> S3: a_it = (interm_type) a_t;
5890 S4 prod_T = a_T * CONST;
5891 '--> S5: prod_T' = a_it w* CONST;
5893 there may be other users of a_T outside the pattern. In that case S2 will
5894 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5895 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5896 be recorded in S3. */
5898 void
5899 vect_pattern_recog (vec_info *vinfo)
5901 class loop *loop;
5902 basic_block *bbs;
5903 unsigned int nbbs;
5904 gimple_stmt_iterator si;
5905 unsigned int i, j;
5907 vect_determine_precisions (vinfo);
5909 DUMP_VECT_SCOPE ("vect_pattern_recog");
5911 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5913 loop = LOOP_VINFO_LOOP (loop_vinfo);
5914 bbs = LOOP_VINFO_BBS (loop_vinfo);
5915 nbbs = loop->num_nodes;
5917 /* Scan through the loop stmts, applying the pattern recognition
5918 functions starting at each stmt visited: */
5919 for (i = 0; i < nbbs; i++)
5921 basic_block bb = bbs[i];
5922 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5924 if (is_gimple_debug (gsi_stmt (si)))
5925 continue;
5926 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5927 /* Scan over all generic vect_recog_xxx_pattern functions. */
5928 for (j = 0; j < NUM_PATTERNS; j++)
5929 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5930 stmt_info);
5934 else
5936 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5937 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5938 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5939 !gsi_end_p (gsi); gsi_next (&gsi))
5941 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5942 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5943 continue;
5945 /* Scan over all generic vect_recog_xxx_pattern functions. */
5946 for (j = 0; j < NUM_PATTERNS; j++)
5947 vect_pattern_recog_1 (vinfo,
5948 &vect_vect_recog_func_ptrs[j], stmt_info);
5952 /* After this no more add_stmt calls are allowed. */
5953 vinfo->stmt_vec_info_ro = true;