Skip gcc.dg/guality/example.c on hppa-linux.
[official-gcc.git] / gcc / tree-vect-patterns.c
blob26421ee5511b90702b0e80fe04c88db29f470d8b
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 #include "gimple-range.h"
53 /* Return true if we have a useful VR_RANGE range for VAR, storing it
54 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
56 static bool
57 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
59 value_range vr;
60 get_range_query (cfun)->range_of_expr (vr, var);
61 if (vr.undefined_p ())
62 vr.set_varying (TREE_TYPE (var));
63 *min_value = wi::to_wide (vr.min ());
64 *max_value = wi::to_wide (vr.max ());
65 value_range_kind vr_type = vr.kind ();
66 wide_int nonzero = get_nonzero_bits (var);
67 signop sgn = TYPE_SIGN (TREE_TYPE (var));
68 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
69 nonzero, sgn) == VR_RANGE)
71 if (dump_enabled_p ())
73 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
74 dump_printf (MSG_NOTE, " has range [");
75 dump_hex (MSG_NOTE, *min_value);
76 dump_printf (MSG_NOTE, ", ");
77 dump_hex (MSG_NOTE, *max_value);
78 dump_printf (MSG_NOTE, "]\n");
80 return true;
82 else
84 if (dump_enabled_p ())
86 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
87 dump_printf (MSG_NOTE, " has no range info\n");
89 return false;
93 /* Report that we've found an instance of pattern PATTERN in
94 statement STMT. */
96 static void
97 vect_pattern_detected (const char *name, gimple *stmt)
99 if (dump_enabled_p ())
100 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
103 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
104 return the pattern statement's stmt_vec_info. Set its vector type to
105 VECTYPE if it doesn't have one already. */
107 static stmt_vec_info
108 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
109 stmt_vec_info orig_stmt_info, tree vectype)
111 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
112 if (pattern_stmt_info == NULL)
113 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
114 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
116 pattern_stmt_info->pattern_stmt_p = true;
117 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
118 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
119 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
120 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
122 gcc_assert (!vectype
123 || (VECTOR_BOOLEAN_TYPE_P (vectype)
124 == vect_use_mask_type_p (orig_stmt_info)));
125 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
126 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
128 return pattern_stmt_info;
131 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
132 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
133 have one already. */
135 static void
136 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
137 stmt_vec_info orig_stmt_info, tree vectype)
139 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
140 STMT_VINFO_RELATED_STMT (orig_stmt_info)
141 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
144 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
145 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
146 be different from the vector type of the final pattern statement.
147 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
148 from which it was derived. */
150 static inline void
151 append_pattern_def_seq (vec_info *vinfo,
152 stmt_vec_info stmt_info, gimple *new_stmt,
153 tree vectype = NULL_TREE,
154 tree scalar_type_for_mask = NULL_TREE)
156 gcc_assert (!scalar_type_for_mask
157 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
158 if (vectype)
160 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
161 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
162 if (scalar_type_for_mask)
163 new_stmt_info->mask_precision
164 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
166 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
167 new_stmt);
170 /* The caller wants to perform new operations on vect_external variable
171 VAR, so that the result of the operations would also be vect_external.
172 Return the edge on which the operations can be performed, if one exists.
173 Return null if the operations should instead be treated as part of
174 the pattern that needs them. */
176 static edge
177 vect_get_external_def_edge (vec_info *vinfo, tree var)
179 edge e = NULL;
180 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
182 e = loop_preheader_edge (loop_vinfo->loop);
183 if (!SSA_NAME_IS_DEFAULT_DEF (var))
185 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
186 if (bb == NULL
187 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
188 e = NULL;
191 return e;
194 /* Return true if the target supports a vector version of CODE,
195 where CODE is known to map to a direct optab with the given SUBTYPE.
196 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
197 specifies the type of the scalar result.
199 If CODE allows the inputs and outputs to have different type
200 (such as for WIDEN_SUM_EXPR), it is the input mode rather
201 than the output mode that determines the appropriate target pattern.
202 Operand 0 of the target pattern then specifies the mode that the output
203 must have.
205 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
206 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
207 is nonnull. */
209 static bool
210 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
211 tree itype, tree *vecotype_out,
212 tree *vecitype_out = NULL,
213 enum optab_subtype subtype = optab_default)
215 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
216 if (!vecitype)
217 return false;
219 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
220 if (!vecotype)
221 return false;
223 optab optab = optab_for_tree_code (code, vecitype, subtype);
224 if (!optab)
225 return false;
227 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
228 if (icode == CODE_FOR_nothing
229 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
230 return false;
232 *vecotype_out = vecotype;
233 if (vecitype_out)
234 *vecitype_out = vecitype;
235 return true;
238 /* Round bit precision PRECISION up to a full element. */
240 static unsigned int
241 vect_element_precision (unsigned int precision)
243 precision = 1 << ceil_log2 (precision);
244 return MAX (precision, BITS_PER_UNIT);
247 /* If OP is defined by a statement that's being considered for vectorization,
248 return information about that statement, otherwise return NULL. */
250 static stmt_vec_info
251 vect_get_internal_def (vec_info *vinfo, tree op)
253 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
254 if (def_stmt_info
255 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
256 return def_stmt_info;
257 return NULL;
260 /* Check whether NAME, an ssa-name used in STMT_VINFO,
261 is a result of a type promotion, such that:
262 DEF_STMT: NAME = NOP (name0)
263 If CHECK_SIGN is TRUE, check that either both types are signed or both are
264 unsigned. */
266 static bool
267 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
268 tree *orig_type, gimple **def_stmt, bool *promotion)
270 tree type = TREE_TYPE (name);
271 tree oprnd0;
272 enum vect_def_type dt;
274 stmt_vec_info def_stmt_info;
275 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
276 return false;
278 if (dt != vect_internal_def
279 && dt != vect_external_def && dt != vect_constant_def)
280 return false;
282 if (!*def_stmt)
283 return false;
285 if (!is_gimple_assign (*def_stmt))
286 return false;
288 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
289 return false;
291 oprnd0 = gimple_assign_rhs1 (*def_stmt);
293 *orig_type = TREE_TYPE (oprnd0);
294 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
295 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
296 return false;
298 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
299 *promotion = true;
300 else
301 *promotion = false;
303 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
304 return false;
306 return true;
309 /* Holds information about an input operand after some sign changes
310 and type promotions have been peeled away. */
311 class vect_unpromoted_value {
312 public:
313 vect_unpromoted_value ();
315 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
317 /* The value obtained after peeling away zero or more casts. */
318 tree op;
320 /* The type of OP. */
321 tree type;
323 /* The definition type of OP. */
324 vect_def_type dt;
326 /* If OP is the result of peeling at least one cast, and if the cast
327 of OP itself is a vectorizable statement, CASTER identifies that
328 statement, otherwise it is null. */
329 stmt_vec_info caster;
332 inline vect_unpromoted_value::vect_unpromoted_value ()
333 : op (NULL_TREE),
334 type (NULL_TREE),
335 dt (vect_uninitialized_def),
336 caster (NULL)
340 /* Set the operand to OP_IN, its definition type to DT_IN, and the
341 statement that casts it to CASTER_IN. */
343 inline void
344 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
345 stmt_vec_info caster_in)
347 op = op_in;
348 type = TREE_TYPE (op);
349 dt = dt_in;
350 caster = caster_in;
353 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
354 to reach some vectorizable inner operand OP', continuing as long as it
355 is possible to convert OP' back to OP using a possible sign change
356 followed by a possible promotion P. Return this OP', or null if OP is
357 not a vectorizable SSA name. If there is a promotion P, describe its
358 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
359 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
360 have more than one user.
362 A successful return means that it is possible to go from OP' to OP
363 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
364 whereas the cast from UNPROM to OP might be a promotion, a sign
365 change, or a nop.
367 E.g. say we have:
369 signed short *ptr = ...;
370 signed short C = *ptr;
371 unsigned short B = (unsigned short) C; // sign change
372 signed int A = (signed int) B; // unsigned promotion
373 ...possible other uses of A...
374 unsigned int OP = (unsigned int) A; // sign change
376 In this case it's possible to go directly from C to OP using:
378 OP = (unsigned int) (unsigned short) C;
379 +------------+ +--------------+
380 promotion sign change
382 so OP' would be C. The input to the promotion is B, so UNPROM
383 would describe B. */
385 static tree
386 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
387 vect_unpromoted_value *unprom,
388 bool *single_use_p = NULL)
390 tree res = NULL_TREE;
391 tree op_type = TREE_TYPE (op);
392 unsigned int orig_precision = TYPE_PRECISION (op_type);
393 unsigned int min_precision = orig_precision;
394 stmt_vec_info caster = NULL;
395 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
397 /* See whether OP is simple enough to vectorize. */
398 stmt_vec_info def_stmt_info;
399 gimple *def_stmt;
400 vect_def_type dt;
401 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
402 break;
404 /* If OP is the input of a demotion, skip over it to see whether
405 OP is itself the result of a promotion. If so, the combined
406 effect of the promotion and the demotion might fit the required
407 pattern, otherwise neither operation fits.
409 This copes with cases such as the result of an arithmetic
410 operation being truncated before being stored, and where that
411 arithmetic operation has been recognized as an over-widened one. */
412 if (TYPE_PRECISION (op_type) <= min_precision)
414 /* Use OP as the UNPROM described above if we haven't yet
415 found a promotion, or if using the new input preserves the
416 sign of the previous promotion. */
417 if (!res
418 || TYPE_PRECISION (unprom->type) == orig_precision
419 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
421 unprom->set_op (op, dt, caster);
422 min_precision = TYPE_PRECISION (op_type);
424 /* Stop if we've already seen a promotion and if this
425 conversion does more than change the sign. */
426 else if (TYPE_PRECISION (op_type)
427 != TYPE_PRECISION (unprom->type))
428 break;
430 /* The sequence now extends to OP. */
431 res = op;
434 /* See whether OP is defined by a cast. Record it as CASTER if
435 the cast is potentially vectorizable. */
436 if (!def_stmt)
437 break;
438 caster = def_stmt_info;
440 /* Ignore pattern statements, since we don't link uses for them. */
441 if (caster
442 && single_use_p
443 && !STMT_VINFO_RELATED_STMT (caster)
444 && !has_single_use (res))
445 *single_use_p = false;
447 gassign *assign = dyn_cast <gassign *> (def_stmt);
448 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
449 break;
451 /* Continue with the input to the cast. */
452 op = gimple_assign_rhs1 (def_stmt);
453 op_type = TREE_TYPE (op);
455 return res;
458 /* OP is an integer operand to an operation that returns TYPE, and we
459 want to treat the operation as a widening one. So far we can treat
460 it as widening from *COMMON_TYPE.
462 Return true if OP is suitable for such a widening operation,
463 either widening from *COMMON_TYPE or from some supertype of it.
464 Update *COMMON_TYPE to the supertype in the latter case.
466 SHIFT_P is true if OP is a shift amount. */
468 static bool
469 vect_joust_widened_integer (tree type, bool shift_p, tree op,
470 tree *common_type)
472 /* Calculate the minimum precision required by OP, without changing
473 the sign of either operand. */
474 unsigned int precision;
475 if (shift_p)
477 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
478 return false;
479 precision = TREE_INT_CST_LOW (op);
481 else
483 precision = wi::min_precision (wi::to_widest (op),
484 TYPE_SIGN (*common_type));
485 if (precision * 2 > TYPE_PRECISION (type))
486 return false;
489 /* If OP requires a wider type, switch to that type. The checks
490 above ensure that this is still narrower than the result. */
491 precision = vect_element_precision (precision);
492 if (TYPE_PRECISION (*common_type) < precision)
493 *common_type = build_nonstandard_integer_type
494 (precision, TYPE_UNSIGNED (*common_type));
495 return true;
498 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
499 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
501 static bool
502 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
504 if (types_compatible_p (*common_type, new_type))
505 return true;
507 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
508 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
509 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
510 return true;
512 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
513 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
514 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
516 *common_type = new_type;
517 return true;
520 /* We have mismatched signs, with the signed type being
521 no wider than the unsigned type. In this case we need
522 a wider signed type. */
523 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
524 TYPE_PRECISION (new_type));
525 precision *= 2;
527 if (precision * 2 > TYPE_PRECISION (type))
528 return false;
530 *common_type = build_nonstandard_integer_type (precision, false);
531 return true;
534 /* Check whether STMT_INFO can be viewed as a tree of integer operations
535 in which each node either performs CODE or WIDENED_CODE, and where
536 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
537 specifies the maximum number of leaf operands. SHIFT_P says whether
538 CODE and WIDENED_CODE are some sort of shift.
540 If STMT_INFO is such a tree, return the number of leaf operands
541 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
542 to a type that (a) is narrower than the result of STMT_INFO and
543 (b) can hold all leaf operand values.
545 If SUBTYPE then allow that the signs of the operands
546 may differ in signs but not in precision. SUBTYPE is updated to reflect
547 this.
549 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
550 exists. */
552 static unsigned int
553 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
554 tree_code widened_code, bool shift_p,
555 unsigned int max_nops,
556 vect_unpromoted_value *unprom, tree *common_type,
557 enum optab_subtype *subtype = NULL)
559 /* Check for an integer operation with the right code. */
560 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
561 if (!assign)
562 return 0;
564 tree_code rhs_code = gimple_assign_rhs_code (assign);
565 if (rhs_code != code && rhs_code != widened_code)
566 return 0;
568 tree type = TREE_TYPE (gimple_assign_lhs (assign));
569 if (!INTEGRAL_TYPE_P (type))
570 return 0;
572 /* Assume that both operands will be leaf operands. */
573 max_nops -= 2;
575 /* Check the operands. */
576 unsigned int next_op = 0;
577 for (unsigned int i = 0; i < 2; ++i)
579 vect_unpromoted_value *this_unprom = &unprom[next_op];
580 unsigned int nops = 1;
581 tree op = gimple_op (assign, i + 1);
582 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
584 /* We already have a common type from earlier operands.
585 Update it to account for OP. */
586 this_unprom->set_op (op, vect_constant_def);
587 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
588 return 0;
590 else
592 /* Only allow shifts by constants. */
593 if (shift_p && i == 1)
594 return 0;
596 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
597 return 0;
599 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
601 /* The operand isn't widened. If STMT_INFO has the code
602 for an unwidened operation, recursively check whether
603 this operand is a node of the tree. */
604 if (rhs_code != code
605 || max_nops == 0
606 || this_unprom->dt != vect_internal_def)
607 return 0;
609 /* Give back the leaf slot allocated above now that we're
610 not treating this as a leaf operand. */
611 max_nops += 1;
613 /* Recursively process the definition of the operand. */
614 stmt_vec_info def_stmt_info
615 = vinfo->lookup_def (this_unprom->op);
616 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
617 widened_code, shift_p, max_nops,
618 this_unprom, common_type,
619 subtype);
620 if (nops == 0)
621 return 0;
623 max_nops -= nops;
625 else
627 /* Make sure that the operand is narrower than the result. */
628 if (TYPE_PRECISION (this_unprom->type) * 2
629 > TYPE_PRECISION (type))
630 return 0;
632 /* Update COMMON_TYPE for the new operand. */
633 if (i == 0)
634 *common_type = this_unprom->type;
635 else if (!vect_joust_widened_type (type, this_unprom->type,
636 common_type))
638 if (subtype)
640 /* See if we can sign extend the smaller type. */
641 if (TYPE_PRECISION (this_unprom->type)
642 > TYPE_PRECISION (*common_type))
643 *common_type = this_unprom->type;
644 *subtype = optab_vector_mixed_sign;
646 else
647 return 0;
651 next_op += nops;
653 return next_op;
656 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
657 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
659 static tree
660 vect_recog_temp_ssa_var (tree type, gimple *stmt)
662 return make_temp_ssa_name (type, stmt, "patt");
665 /* STMT2_INFO describes a type conversion that could be split into STMT1
666 followed by a version of STMT2_INFO that takes NEW_RHS as its first
667 input. Try to do this using pattern statements, returning true on
668 success. */
670 static bool
671 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
672 gimple *stmt1, tree vectype)
674 if (is_pattern_stmt_p (stmt2_info))
676 /* STMT2_INFO is part of a pattern. Get the statement to which
677 the pattern is attached. */
678 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
679 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
681 if (dump_enabled_p ())
682 dump_printf_loc (MSG_NOTE, vect_location,
683 "Splitting pattern statement: %G", stmt2_info->stmt);
685 /* Since STMT2_INFO is a pattern statement, we can change it
686 in-situ without worrying about changing the code for the
687 containing block. */
688 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
693 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
694 stmt2_info->stmt);
697 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
698 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
699 /* STMT2_INFO is the actual pattern statement. Add STMT1
700 to the end of the definition sequence. */
701 gimple_seq_add_stmt_without_update (def_seq, stmt1);
702 else
704 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
705 before it. */
706 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
707 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
709 return true;
711 else
713 /* STMT2_INFO doesn't yet have a pattern. Try to create a
714 two-statement pattern now. */
715 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
716 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
717 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
718 if (!lhs_vectype)
719 return false;
721 if (dump_enabled_p ())
722 dump_printf_loc (MSG_NOTE, vect_location,
723 "Splitting statement: %G", stmt2_info->stmt);
725 /* Add STMT1 as a singleton pattern definition sequence. */
726 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
727 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
728 gimple_seq_add_stmt_without_update (def_seq, stmt1);
730 /* Build the second of the two pattern statements. */
731 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
732 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
733 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
735 if (dump_enabled_p ())
737 dump_printf_loc (MSG_NOTE, vect_location,
738 "into pattern statements: %G", stmt1);
739 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
742 return true;
746 /* Convert UNPROM to TYPE and return the result, adding new statements
747 to STMT_INFO's pattern definition statements if no better way is
748 available. VECTYPE is the vector form of TYPE.
750 If SUBTYPE then convert the type based on the subtype. */
752 static tree
753 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
754 vect_unpromoted_value *unprom, tree vectype,
755 enum optab_subtype subtype = optab_default)
758 /* Update the type if the signs differ. */
759 if (subtype == optab_vector_mixed_sign
760 && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
761 type = build_nonstandard_integer_type (TYPE_PRECISION (type),
762 TYPE_SIGN (unprom->type));
764 /* Check for a no-op conversion. */
765 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
766 return unprom->op;
768 /* Allow the caller to create constant vect_unpromoted_values. */
769 if (TREE_CODE (unprom->op) == INTEGER_CST)
770 return wide_int_to_tree (type, wi::to_widest (unprom->op));
772 tree input = unprom->op;
773 if (unprom->caster)
775 tree lhs = gimple_get_lhs (unprom->caster->stmt);
776 tree lhs_type = TREE_TYPE (lhs);
778 /* If the result of the existing cast is the right width, use it
779 instead of the source of the cast. */
780 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
781 input = lhs;
782 /* If the precision we want is between the source and result
783 precisions of the existing cast, try splitting the cast into
784 two and tapping into a mid-way point. */
785 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
786 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
788 /* In order to preserve the semantics of the original cast,
789 give the mid-way point the same signedness as the input value.
791 It would be possible to use a signed type here instead if
792 TYPE is signed and UNPROM->TYPE is unsigned, but that would
793 make the sign of the midtype sensitive to the order in
794 which we process the statements, since the signedness of
795 TYPE is the signedness required by just one of possibly
796 many users. Also, unsigned promotions are usually as cheap
797 as or cheaper than signed ones, so it's better to keep an
798 unsigned promotion. */
799 tree midtype = build_nonstandard_integer_type
800 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
801 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
802 if (vec_midtype)
804 input = vect_recog_temp_ssa_var (midtype, NULL);
805 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
806 unprom->op);
807 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
808 vec_midtype))
809 append_pattern_def_seq (vinfo, stmt_info,
810 new_stmt, vec_midtype);
814 /* See if we can reuse an existing result. */
815 if (types_compatible_p (type, TREE_TYPE (input)))
816 return input;
819 /* We need a new conversion statement. */
820 tree new_op = vect_recog_temp_ssa_var (type, NULL);
821 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
823 /* If OP is an external value, see if we can insert the new statement
824 on an incoming edge. */
825 if (input == unprom->op && unprom->dt == vect_external_def)
826 if (edge e = vect_get_external_def_edge (vinfo, input))
828 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
829 gcc_assert (!new_bb);
830 return new_op;
833 /* As a (common) last resort, add the statement to the pattern itself. */
834 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
835 return new_op;
838 /* Invoke vect_convert_input for N elements of UNPROM and store the
839 result in the corresponding elements of RESULT.
841 If SUBTYPE then convert the type based on the subtype. */
843 static void
844 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
845 tree *result, tree type, vect_unpromoted_value *unprom,
846 tree vectype, enum optab_subtype subtype = optab_default)
848 for (unsigned int i = 0; i < n; ++i)
850 unsigned int j;
851 for (j = 0; j < i; ++j)
852 if (unprom[j].op == unprom[i].op)
853 break;
855 if (j < i)
856 result[i] = result[j];
857 else
858 result[i] = vect_convert_input (vinfo, stmt_info,
859 type, &unprom[i], vectype, subtype);
863 /* The caller has created a (possibly empty) sequence of pattern definition
864 statements followed by a single statement PATTERN_STMT. Cast the result
865 of this final statement to TYPE. If a new statement is needed, add
866 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
867 and return the new statement, otherwise return PATTERN_STMT as-is.
868 VECITYPE is the vector form of PATTERN_STMT's result type. */
870 static gimple *
871 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
872 gimple *pattern_stmt, tree vecitype)
874 tree lhs = gimple_get_lhs (pattern_stmt);
875 if (!types_compatible_p (type, TREE_TYPE (lhs)))
877 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
878 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
879 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
881 return pattern_stmt;
884 /* Return true if STMT_VINFO describes a reduction for which reassociation
885 is allowed. If STMT_INFO is part of a group, assume that it's part of
886 a reduction chain and optimistically assume that all statements
887 except the last allow reassociation.
888 Also require it to have code CODE and to be a reduction
889 in the outermost loop. When returning true, store the operands in
890 *OP0_OUT and *OP1_OUT. */
892 static bool
893 vect_reassociating_reduction_p (vec_info *vinfo,
894 stmt_vec_info stmt_info, tree_code code,
895 tree *op0_out, tree *op1_out)
897 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
898 if (!loop_info)
899 return false;
901 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
902 if (!assign || gimple_assign_rhs_code (assign) != code)
903 return false;
905 /* We don't allow changing the order of the computation in the inner-loop
906 when doing outer-loop vectorization. */
907 class loop *loop = LOOP_VINFO_LOOP (loop_info);
908 if (loop && nested_in_vect_loop_p (loop, stmt_info))
909 return false;
911 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
913 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
914 code))
915 return false;
917 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
918 return false;
920 *op0_out = gimple_assign_rhs1 (assign);
921 *op1_out = gimple_assign_rhs2 (assign);
922 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
923 std::swap (*op0_out, *op1_out);
924 return true;
927 /* Function vect_recog_dot_prod_pattern
929 Try to find the following pattern:
931 type1a x_t
932 type1b y_t;
933 TYPE1 prod;
934 TYPE2 sum = init;
935 loop:
936 sum_0 = phi <init, sum_1>
937 S1 x_t = ...
938 S2 y_t = ...
939 S3 x_T = (TYPE1) x_t;
940 S4 y_T = (TYPE1) y_t;
941 S5 prod = x_T * y_T;
942 [S6 prod = (TYPE2) prod; #optional]
943 S7 sum_1 = prod + sum_0;
945 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
946 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
947 'type1a' and 'type1b' can differ.
949 Input:
951 * STMT_VINFO: The stmt from which the pattern search begins. In the
952 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
953 will be detected.
955 Output:
957 * TYPE_OUT: The type of the output of this pattern.
959 * Return value: A new stmt that will be used to replace the sequence of
960 stmts that constitute the pattern. In this case it will be:
961 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
963 Note: The dot-prod idiom is a widening reduction pattern that is
964 vectorized without preserving all the intermediate results. It
965 produces only N/2 (widened) results (by summing up pairs of
966 intermediate results) rather than all N results. Therefore, we
967 cannot allow this pattern when we want to get all the results and in
968 the correct order (as is the case when this computation is in an
969 inner-loop nested in an outer-loop that us being vectorized). */
971 static gimple *
972 vect_recog_dot_prod_pattern (vec_info *vinfo,
973 stmt_vec_info stmt_vinfo, tree *type_out)
975 tree oprnd0, oprnd1;
976 gimple *last_stmt = stmt_vinfo->stmt;
977 tree type, half_type;
978 gimple *pattern_stmt;
979 tree var;
981 /* Look for the following pattern
982 DX = (TYPE1) X;
983 DY = (TYPE1) Y;
984 DPROD = DX * DY;
985 DDPROD = (TYPE2) DPROD;
986 sum_1 = DDPROD + sum_0;
987 In which
988 - DX is double the size of X
989 - DY is double the size of Y
990 - DX, DY, DPROD all have the same type but the sign
991 between X, Y and DPROD can differ.
992 - sum is the same size of DPROD or bigger
993 - sum has been recognized as a reduction variable.
995 This is equivalent to:
996 DPROD = X w* Y; #widen mult
997 sum_1 = DPROD w+ sum_0; #widen summation
999 DPROD = X w* Y; #widen mult
1000 sum_1 = DPROD + sum_0; #summation
1003 /* Starting from LAST_STMT, follow the defs of its uses in search
1004 of the above pattern. */
1006 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1007 &oprnd0, &oprnd1))
1008 return NULL;
1010 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1012 vect_unpromoted_value unprom_mult;
1013 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1015 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1016 we know that oprnd1 is the reduction variable (defined by a loop-header
1017 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1018 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1019 if (!oprnd0)
1020 return NULL;
1022 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1023 if (!mult_vinfo)
1024 return NULL;
1026 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1027 inside the loop (in case we are analyzing an outer-loop). */
1028 vect_unpromoted_value unprom0[2];
1029 enum optab_subtype subtype = optab_vector;
1030 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1031 false, 2, unprom0, &half_type, &subtype))
1032 return NULL;
1034 /* If there are two widening operations, make sure they agree on the sign
1035 of the extension. The result of an optab_vector_mixed_sign operation
1036 is signed; otherwise, the result has the same sign as the operands. */
1037 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1038 && (subtype == optab_vector_mixed_sign
1039 ? TYPE_UNSIGNED (unprom_mult.type)
1040 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1041 return NULL;
1043 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1045 tree half_vectype;
1046 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1047 type_out, &half_vectype, subtype))
1048 return NULL;
1050 /* Get the inputs in the appropriate types. */
1051 tree mult_oprnd[2];
1052 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1053 unprom0, half_vectype, subtype);
1055 var = vect_recog_temp_ssa_var (type, NULL);
1056 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1057 mult_oprnd[0], mult_oprnd[1], oprnd1);
1059 return pattern_stmt;
1063 /* Function vect_recog_sad_pattern
1065 Try to find the following Sum of Absolute Difference (SAD) pattern:
1067 type x_t, y_t;
1068 signed TYPE1 diff, abs_diff;
1069 TYPE2 sum = init;
1070 loop:
1071 sum_0 = phi <init, sum_1>
1072 S1 x_t = ...
1073 S2 y_t = ...
1074 S3 x_T = (TYPE1) x_t;
1075 S4 y_T = (TYPE1) y_t;
1076 S5 diff = x_T - y_T;
1077 S6 abs_diff = ABS_EXPR <diff>;
1078 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1079 S8 sum_1 = abs_diff + sum_0;
1081 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1082 same size of 'TYPE1' or bigger. This is a special case of a reduction
1083 computation.
1085 Input:
1087 * STMT_VINFO: The stmt from which the pattern search begins. In the
1088 example, when this function is called with S8, the pattern
1089 {S3,S4,S5,S6,S7,S8} will be detected.
1091 Output:
1093 * TYPE_OUT: The type of the output of this pattern.
1095 * Return value: A new stmt that will be used to replace the sequence of
1096 stmts that constitute the pattern. In this case it will be:
1097 SAD_EXPR <x_t, y_t, sum_0>
1100 static gimple *
1101 vect_recog_sad_pattern (vec_info *vinfo,
1102 stmt_vec_info stmt_vinfo, tree *type_out)
1104 gimple *last_stmt = stmt_vinfo->stmt;
1105 tree half_type;
1107 /* Look for the following pattern
1108 DX = (TYPE1) X;
1109 DY = (TYPE1) Y;
1110 DDIFF = DX - DY;
1111 DAD = ABS_EXPR <DDIFF>;
1112 DDPROD = (TYPE2) DPROD;
1113 sum_1 = DAD + sum_0;
1114 In which
1115 - DX is at least double the size of X
1116 - DY is at least double the size of Y
1117 - DX, DY, DDIFF, DAD all have the same type
1118 - sum is the same size of DAD or bigger
1119 - sum has been recognized as a reduction variable.
1121 This is equivalent to:
1122 DDIFF = X w- Y; #widen sub
1123 DAD = ABS_EXPR <DDIFF>;
1124 sum_1 = DAD w+ sum_0; #widen summation
1126 DDIFF = X w- Y; #widen sub
1127 DAD = ABS_EXPR <DDIFF>;
1128 sum_1 = DAD + sum_0; #summation
1131 /* Starting from LAST_STMT, follow the defs of its uses in search
1132 of the above pattern. */
1134 tree plus_oprnd0, plus_oprnd1;
1135 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1136 &plus_oprnd0, &plus_oprnd1))
1137 return NULL;
1139 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1141 /* Any non-truncating sequence of conversions is OK here, since
1142 with a successful match, the result of the ABS(U) is known to fit
1143 within the nonnegative range of the result type. (It cannot be the
1144 negative of the minimum signed value due to the range of the widening
1145 MINUS_EXPR.) */
1146 vect_unpromoted_value unprom_abs;
1147 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1148 &unprom_abs);
1150 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1151 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1152 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1153 Then check that plus_oprnd0 is defined by an abs_expr. */
1155 if (!plus_oprnd0)
1156 return NULL;
1158 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1159 if (!abs_stmt_vinfo)
1160 return NULL;
1162 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1163 inside the loop (in case we are analyzing an outer-loop). */
1164 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1165 if (!abs_stmt
1166 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1167 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1168 return NULL;
1170 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1171 tree abs_type = TREE_TYPE (abs_oprnd);
1172 if (TYPE_UNSIGNED (abs_type))
1173 return NULL;
1175 /* Peel off conversions from the ABS input. This can involve sign
1176 changes (e.g. from an unsigned subtraction to a signed ABS input)
1177 or signed promotion, but it can't include unsigned promotion.
1178 (Note that ABS of an unsigned promotion should have been folded
1179 away before now anyway.) */
1180 vect_unpromoted_value unprom_diff;
1181 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1182 &unprom_diff);
1183 if (!abs_oprnd)
1184 return NULL;
1185 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1186 && TYPE_UNSIGNED (unprom_diff.type))
1187 return NULL;
1189 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1190 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1191 if (!diff_stmt_vinfo)
1192 return NULL;
1194 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1195 inside the loop (in case we are analyzing an outer-loop). */
1196 vect_unpromoted_value unprom[2];
1197 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1198 false, 2, unprom, &half_type))
1199 return NULL;
1201 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1203 tree half_vectype;
1204 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1205 type_out, &half_vectype))
1206 return NULL;
1208 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1209 tree sad_oprnd[2];
1210 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1211 unprom, half_vectype);
1213 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1214 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1215 sad_oprnd[1], plus_oprnd1);
1217 return pattern_stmt;
1220 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1221 so that it can be treated as though it had the form:
1223 A_TYPE a;
1224 B_TYPE b;
1225 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1226 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1227 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1228 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1229 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1231 Try to replace the pattern with:
1233 A_TYPE a;
1234 B_TYPE b;
1235 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1236 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1237 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1238 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1240 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1242 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1243 name of the pattern being matched, for dump purposes. */
1245 static gimple *
1246 vect_recog_widen_op_pattern (vec_info *vinfo,
1247 stmt_vec_info last_stmt_info, tree *type_out,
1248 tree_code orig_code, tree_code wide_code,
1249 bool shift_p, const char *name)
1251 gimple *last_stmt = last_stmt_info->stmt;
1253 vect_unpromoted_value unprom[2];
1254 tree half_type;
1255 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1256 shift_p, 2, unprom, &half_type))
1257 return NULL;
1259 /* Pattern detected. */
1260 vect_pattern_detected (name, last_stmt);
1262 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1263 tree itype = type;
1264 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1265 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1266 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1267 TYPE_UNSIGNED (half_type));
1269 /* Check target support */
1270 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1271 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1272 tree ctype = itype;
1273 tree vecctype = vecitype;
1274 if (orig_code == MINUS_EXPR
1275 && TYPE_UNSIGNED (itype)
1276 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1278 /* Subtraction is special, even if half_type is unsigned and no matter
1279 whether type is signed or unsigned, if type is wider than itype,
1280 we need to sign-extend from the widening operation result to the
1281 result type.
1282 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1283 itype unsigned short and type either int or unsigned int.
1284 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1285 (unsigned short) 0xffff, but for type int we want the result -1
1286 and for type unsigned int 0xffffffff rather than 0xffff. */
1287 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1288 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1291 enum tree_code dummy_code;
1292 int dummy_int;
1293 auto_vec<tree> dummy_vec;
1294 if (!vectype
1295 || !vecitype
1296 || !vecctype
1297 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1298 vecitype, vectype,
1299 &dummy_code, &dummy_code,
1300 &dummy_int, &dummy_vec))
1301 return NULL;
1303 *type_out = get_vectype_for_scalar_type (vinfo, type);
1304 if (!*type_out)
1305 return NULL;
1307 tree oprnd[2];
1308 vect_convert_inputs (vinfo, last_stmt_info,
1309 2, oprnd, half_type, unprom, vectype);
1311 tree var = vect_recog_temp_ssa_var (itype, NULL);
1312 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1313 oprnd[0], oprnd[1]);
1315 if (vecctype != vecitype)
1316 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1317 pattern_stmt, vecitype);
1319 return vect_convert_output (vinfo, last_stmt_info,
1320 type, pattern_stmt, vecctype);
1323 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1324 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1326 static gimple *
1327 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1328 tree *type_out)
1330 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1331 MULT_EXPR, WIDEN_MULT_EXPR, false,
1332 "vect_recog_widen_mult_pattern");
1335 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1336 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1338 static gimple *
1339 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1340 tree *type_out)
1342 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1343 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1344 "vect_recog_widen_plus_pattern");
1347 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1348 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1349 static gimple *
1350 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1351 tree *type_out)
1353 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1354 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1355 "vect_recog_widen_minus_pattern");
1358 /* Function vect_recog_popcount_pattern
1360 Try to find the following pattern:
1362 UTYPE1 A;
1363 TYPE1 B;
1364 UTYPE2 temp_in;
1365 TYPE3 temp_out;
1366 temp_in = (UTYPE2)A;
1368 temp_out = __builtin_popcount{,l,ll} (temp_in);
1369 B = (TYPE1) temp_out;
1371 TYPE2 may or may not be equal to TYPE3.
1372 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1373 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1375 Input:
1377 * STMT_VINFO: The stmt from which the pattern search begins.
1378 here it starts with B = (TYPE1) temp_out;
1380 Output:
1382 * TYPE_OUT: The vector type of the output of this pattern.
1384 * Return value: A new stmt that will be used to replace the sequence of
1385 stmts that constitute the pattern. In this case it will be:
1386 B = .POPCOUNT (A);
1389 static gimple *
1390 vect_recog_popcount_pattern (vec_info *vinfo,
1391 stmt_vec_info stmt_vinfo, tree *type_out)
1393 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1394 gimple *popcount_stmt, *pattern_stmt;
1395 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1396 auto_vec<tree> vargs;
1398 /* Find B = (TYPE1) temp_out. */
1399 if (!last_stmt)
1400 return NULL;
1401 tree_code code = gimple_assign_rhs_code (last_stmt);
1402 if (!CONVERT_EXPR_CODE_P (code))
1403 return NULL;
1405 lhs_oprnd = gimple_assign_lhs (last_stmt);
1406 lhs_type = TREE_TYPE (lhs_oprnd);
1407 if (!INTEGRAL_TYPE_P (lhs_type))
1408 return NULL;
1410 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1411 if (TREE_CODE (rhs_oprnd) != SSA_NAME
1412 || !has_single_use (rhs_oprnd))
1413 return NULL;
1414 popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1416 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1417 if (!is_gimple_call (popcount_stmt))
1418 return NULL;
1419 switch (gimple_call_combined_fn (popcount_stmt))
1421 CASE_CFN_POPCOUNT:
1422 break;
1423 default:
1424 return NULL;
1427 if (gimple_call_num_args (popcount_stmt) != 1)
1428 return NULL;
1430 rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1431 vect_unpromoted_value unprom_diff;
1432 rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1433 &unprom_diff);
1435 if (!rhs_origin)
1436 return NULL;
1438 /* Input and output of .POPCOUNT should be same-precision integer.
1439 Also A should be unsigned or same precision as temp_in,
1440 otherwise there would be sign_extend from A to temp_in. */
1441 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1442 || (!TYPE_UNSIGNED (unprom_diff.type)
1443 && (TYPE_PRECISION (unprom_diff.type)
1444 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1445 return NULL;
1446 vargs.safe_push (unprom_diff.op);
1448 vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1449 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1450 /* Do it only if the backend has popcount<vector_mode>2 pattern. */
1451 if (!vec_type
1452 || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1453 OPTIMIZE_FOR_SPEED))
1454 return NULL;
1456 /* Create B = .POPCOUNT (A). */
1457 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1458 pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1459 gimple_call_set_lhs (pattern_stmt, new_var);
1460 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1461 *type_out = vec_type;
1463 if (dump_enabled_p ())
1464 dump_printf_loc (MSG_NOTE, vect_location,
1465 "created pattern stmt: %G", pattern_stmt);
1466 return pattern_stmt;
1469 /* Function vect_recog_pow_pattern
1471 Try to find the following pattern:
1473 x = POW (y, N);
1475 with POW being one of pow, powf, powi, powif and N being
1476 either 2 or 0.5.
1478 Input:
1480 * STMT_VINFO: The stmt from which the pattern search begins.
1482 Output:
1484 * TYPE_OUT: The type of the output of this pattern.
1486 * Return value: A new stmt that will be used to replace the sequence of
1487 stmts that constitute the pattern. In this case it will be:
1488 x = x * x
1490 x = sqrt (x)
1493 static gimple *
1494 vect_recog_pow_pattern (vec_info *vinfo,
1495 stmt_vec_info stmt_vinfo, tree *type_out)
1497 gimple *last_stmt = stmt_vinfo->stmt;
1498 tree base, exp;
1499 gimple *stmt;
1500 tree var;
1502 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1503 return NULL;
1505 switch (gimple_call_combined_fn (last_stmt))
1507 CASE_CFN_POW:
1508 CASE_CFN_POWI:
1509 break;
1511 default:
1512 return NULL;
1515 base = gimple_call_arg (last_stmt, 0);
1516 exp = gimple_call_arg (last_stmt, 1);
1517 if (TREE_CODE (exp) != REAL_CST
1518 && TREE_CODE (exp) != INTEGER_CST)
1520 if (flag_unsafe_math_optimizations
1521 && TREE_CODE (base) == REAL_CST
1522 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1524 combined_fn log_cfn;
1525 built_in_function exp_bfn;
1526 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1528 case BUILT_IN_POW:
1529 log_cfn = CFN_BUILT_IN_LOG;
1530 exp_bfn = BUILT_IN_EXP;
1531 break;
1532 case BUILT_IN_POWF:
1533 log_cfn = CFN_BUILT_IN_LOGF;
1534 exp_bfn = BUILT_IN_EXPF;
1535 break;
1536 case BUILT_IN_POWL:
1537 log_cfn = CFN_BUILT_IN_LOGL;
1538 exp_bfn = BUILT_IN_EXPL;
1539 break;
1540 default:
1541 return NULL;
1543 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1544 tree exp_decl = builtin_decl_implicit (exp_bfn);
1545 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1546 does that, but if C is a power of 2, we want to use
1547 exp2 (log2 (C) * x) in the non-vectorized version, but for
1548 vectorization we don't have vectorized exp2. */
1549 if (logc
1550 && TREE_CODE (logc) == REAL_CST
1551 && exp_decl
1552 && lookup_attribute ("omp declare simd",
1553 DECL_ATTRIBUTES (exp_decl)))
1555 cgraph_node *node = cgraph_node::get_create (exp_decl);
1556 if (node->simd_clones == NULL)
1558 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1559 || node->definition)
1560 return NULL;
1561 expand_simd_clones (node);
1562 if (node->simd_clones == NULL)
1563 return NULL;
1565 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1566 if (!*type_out)
1567 return NULL;
1568 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1569 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1570 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1571 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1572 g = gimple_build_call (exp_decl, 1, def);
1573 gimple_call_set_lhs (g, res);
1574 return g;
1578 return NULL;
1581 /* We now have a pow or powi builtin function call with a constant
1582 exponent. */
1584 /* Catch squaring. */
1585 if ((tree_fits_shwi_p (exp)
1586 && tree_to_shwi (exp) == 2)
1587 || (TREE_CODE (exp) == REAL_CST
1588 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1590 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1591 TREE_TYPE (base), type_out))
1592 return NULL;
1594 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1595 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1596 return stmt;
1599 /* Catch square root. */
1600 if (TREE_CODE (exp) == REAL_CST
1601 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1603 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1604 if (*type_out
1605 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1606 OPTIMIZE_FOR_SPEED))
1608 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1609 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1610 gimple_call_set_lhs (stmt, var);
1611 gimple_call_set_nothrow (stmt, true);
1612 return stmt;
1616 return NULL;
1620 /* Function vect_recog_widen_sum_pattern
1622 Try to find the following pattern:
1624 type x_t;
1625 TYPE x_T, sum = init;
1626 loop:
1627 sum_0 = phi <init, sum_1>
1628 S1 x_t = *p;
1629 S2 x_T = (TYPE) x_t;
1630 S3 sum_1 = x_T + sum_0;
1632 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1633 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1634 a special case of a reduction computation.
1636 Input:
1638 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1639 when this function is called with S3, the pattern {S2,S3} will be detected.
1641 Output:
1643 * TYPE_OUT: The type of the output of this pattern.
1645 * Return value: A new stmt that will be used to replace the sequence of
1646 stmts that constitute the pattern. In this case it will be:
1647 WIDEN_SUM <x_t, sum_0>
1649 Note: The widening-sum idiom is a widening reduction pattern that is
1650 vectorized without preserving all the intermediate results. It
1651 produces only N/2 (widened) results (by summing up pairs of
1652 intermediate results) rather than all N results. Therefore, we
1653 cannot allow this pattern when we want to get all the results and in
1654 the correct order (as is the case when this computation is in an
1655 inner-loop nested in an outer-loop that us being vectorized). */
1657 static gimple *
1658 vect_recog_widen_sum_pattern (vec_info *vinfo,
1659 stmt_vec_info stmt_vinfo, tree *type_out)
1661 gimple *last_stmt = stmt_vinfo->stmt;
1662 tree oprnd0, oprnd1;
1663 tree type;
1664 gimple *pattern_stmt;
1665 tree var;
1667 /* Look for the following pattern
1668 DX = (TYPE) X;
1669 sum_1 = DX + sum_0;
1670 In which DX is at least double the size of X, and sum_1 has been
1671 recognized as a reduction variable.
1674 /* Starting from LAST_STMT, follow the defs of its uses in search
1675 of the above pattern. */
1677 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1678 &oprnd0, &oprnd1))
1679 return NULL;
1681 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1683 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1684 we know that oprnd1 is the reduction variable (defined by a loop-header
1685 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1686 Left to check that oprnd0 is defined by a cast from type 'type' to type
1687 'TYPE'. */
1689 vect_unpromoted_value unprom0;
1690 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1691 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1692 return NULL;
1694 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1696 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1697 unprom0.type, type_out))
1698 return NULL;
1700 var = vect_recog_temp_ssa_var (type, NULL);
1701 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1703 return pattern_stmt;
1706 /* Recognize cases in which an operation is performed in one type WTYPE
1707 but could be done more efficiently in a narrower type NTYPE. For example,
1708 if we have:
1710 ATYPE a; // narrower than NTYPE
1711 BTYPE b; // narrower than NTYPE
1712 WTYPE aw = (WTYPE) a;
1713 WTYPE bw = (WTYPE) b;
1714 WTYPE res = aw + bw; // only uses of aw and bw
1716 then it would be more efficient to do:
1718 NTYPE an = (NTYPE) a;
1719 NTYPE bn = (NTYPE) b;
1720 NTYPE resn = an + bn;
1721 WTYPE res = (WTYPE) resn;
1723 Other situations include things like:
1725 ATYPE a; // NTYPE or narrower
1726 WTYPE aw = (WTYPE) a;
1727 WTYPE res = aw + b;
1729 when only "(NTYPE) res" is significant. In that case it's more efficient
1730 to truncate "b" and do the operation on NTYPE instead:
1732 NTYPE an = (NTYPE) a;
1733 NTYPE bn = (NTYPE) b; // truncation
1734 NTYPE resn = an + bn;
1735 WTYPE res = (WTYPE) resn;
1737 All users of "res" should then use "resn" instead, making the final
1738 statement dead (not marked as relevant). The final statement is still
1739 needed to maintain the type correctness of the IR.
1741 vect_determine_precisions has already determined the minimum
1742 precison of the operation and the minimum precision required
1743 by users of the result. */
1745 static gimple *
1746 vect_recog_over_widening_pattern (vec_info *vinfo,
1747 stmt_vec_info last_stmt_info, tree *type_out)
1749 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1750 if (!last_stmt)
1751 return NULL;
1753 /* See whether we have found that this operation can be done on a
1754 narrower type without changing its semantics. */
1755 unsigned int new_precision = last_stmt_info->operation_precision;
1756 if (!new_precision)
1757 return NULL;
1759 tree lhs = gimple_assign_lhs (last_stmt);
1760 tree type = TREE_TYPE (lhs);
1761 tree_code code = gimple_assign_rhs_code (last_stmt);
1763 /* Punt for reductions where we don't handle the type conversions. */
1764 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1765 return NULL;
1767 /* Keep the first operand of a COND_EXPR as-is: only the other two
1768 operands are interesting. */
1769 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1771 /* Check the operands. */
1772 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1773 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1774 unprom.quick_grow (nops);
1775 unsigned int min_precision = 0;
1776 bool single_use_p = false;
1777 for (unsigned int i = 0; i < nops; ++i)
1779 tree op = gimple_op (last_stmt, first_op + i);
1780 if (TREE_CODE (op) == INTEGER_CST)
1781 unprom[i].set_op (op, vect_constant_def);
1782 else if (TREE_CODE (op) == SSA_NAME)
1784 bool op_single_use_p = true;
1785 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1786 &op_single_use_p))
1787 return NULL;
1788 /* If:
1790 (1) N bits of the result are needed;
1791 (2) all inputs are widened from M<N bits; and
1792 (3) one operand OP is a single-use SSA name
1794 we can shift the M->N widening from OP to the output
1795 without changing the number or type of extensions involved.
1796 This then reduces the number of copies of STMT_INFO.
1798 If instead of (3) more than one operand is a single-use SSA name,
1799 shifting the extension to the output is even more of a win.
1801 If instead:
1803 (1) N bits of the result are needed;
1804 (2) one operand OP2 is widened from M2<N bits;
1805 (3) another operand OP1 is widened from M1<M2 bits; and
1806 (4) both OP1 and OP2 are single-use
1808 the choice is between:
1810 (a) truncating OP2 to M1, doing the operation on M1,
1811 and then widening the result to N
1813 (b) widening OP1 to M2, doing the operation on M2, and then
1814 widening the result to N
1816 Both shift the M2->N widening of the inputs to the output.
1817 (a) additionally shifts the M1->M2 widening to the output;
1818 it requires fewer copies of STMT_INFO but requires an extra
1819 M2->M1 truncation.
1821 Which is better will depend on the complexity and cost of
1822 STMT_INFO, which is hard to predict at this stage. However,
1823 a clear tie-breaker in favor of (b) is the fact that the
1824 truncation in (a) increases the length of the operation chain.
1826 If instead of (4) only one of OP1 or OP2 is single-use,
1827 (b) is still a win over doing the operation in N bits:
1828 it still shifts the M2->N widening on the single-use operand
1829 to the output and reduces the number of STMT_INFO copies.
1831 If neither operand is single-use then operating on fewer than
1832 N bits might lead to more extensions overall. Whether it does
1833 or not depends on global information about the vectorization
1834 region, and whether that's a good trade-off would again
1835 depend on the complexity and cost of the statements involved,
1836 as well as things like register pressure that are not normally
1837 modelled at this stage. We therefore ignore these cases
1838 and just optimize the clear single-use wins above.
1840 Thus we take the maximum precision of the unpromoted operands
1841 and record whether any operand is single-use. */
1842 if (unprom[i].dt == vect_internal_def)
1844 min_precision = MAX (min_precision,
1845 TYPE_PRECISION (unprom[i].type));
1846 single_use_p |= op_single_use_p;
1849 else
1850 return NULL;
1853 /* Although the operation could be done in operation_precision, we have
1854 to balance that against introducing extra truncations or extensions.
1855 Calculate the minimum precision that can be handled efficiently.
1857 The loop above determined that the operation could be handled
1858 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1859 extension from the inputs to the output without introducing more
1860 instructions, and would reduce the number of instructions required
1861 for STMT_INFO itself.
1863 vect_determine_precisions has also determined that the result only
1864 needs min_output_precision bits. Truncating by a factor of N times
1865 requires a tree of N - 1 instructions, so if TYPE is N times wider
1866 than min_output_precision, doing the operation in TYPE and truncating
1867 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1868 In contrast:
1870 - truncating the input to a unary operation and doing the operation
1871 in the new type requires at most N - 1 + 1 = N instructions per
1872 output vector
1874 - doing the same for a binary operation requires at most
1875 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1877 Both unary and binary operations require fewer instructions than
1878 this if the operands were extended from a suitable truncated form.
1879 Thus there is usually nothing to lose by doing operations in
1880 min_output_precision bits, but there can be something to gain. */
1881 if (!single_use_p)
1882 min_precision = last_stmt_info->min_output_precision;
1883 else
1884 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1886 /* Apply the minimum efficient precision we just calculated. */
1887 if (new_precision < min_precision)
1888 new_precision = min_precision;
1889 new_precision = vect_element_precision (new_precision);
1890 if (new_precision >= TYPE_PRECISION (type))
1891 return NULL;
1893 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1895 *type_out = get_vectype_for_scalar_type (vinfo, type);
1896 if (!*type_out)
1897 return NULL;
1899 /* We've found a viable pattern. Get the new type of the operation. */
1900 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1901 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1903 /* If we're truncating an operation, we need to make sure that we
1904 don't introduce new undefined overflow. The codes tested here are
1905 a subset of those accepted by vect_truncatable_operation_p. */
1906 tree op_type = new_type;
1907 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1908 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1909 op_type = build_nonstandard_integer_type (new_precision, true);
1911 /* We specifically don't check here whether the target supports the
1912 new operation, since it might be something that a later pattern
1913 wants to rewrite anyway. If targets have a minimum element size
1914 for some optabs, we should pattern-match smaller ops to larger ops
1915 where beneficial. */
1916 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1917 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1918 if (!new_vectype || !op_vectype)
1919 return NULL;
1921 if (dump_enabled_p ())
1922 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1923 type, new_type);
1925 /* Calculate the rhs operands for an operation on OP_TYPE. */
1926 tree ops[3] = {};
1927 for (unsigned int i = 1; i < first_op; ++i)
1928 ops[i - 1] = gimple_op (last_stmt, i);
1929 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
1930 op_type, &unprom[0], op_vectype);
1932 /* Use the operation to produce a result of type OP_TYPE. */
1933 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1934 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1935 ops[0], ops[1], ops[2]);
1936 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1938 if (dump_enabled_p ())
1939 dump_printf_loc (MSG_NOTE, vect_location,
1940 "created pattern stmt: %G", pattern_stmt);
1942 /* Convert back to the original signedness, if OP_TYPE is different
1943 from NEW_TYPE. */
1944 if (op_type != new_type)
1945 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
1946 pattern_stmt, op_vectype);
1948 /* Promote the result to the original type. */
1949 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
1950 pattern_stmt, new_vectype);
1952 return pattern_stmt;
1955 /* Recognize the following patterns:
1957 ATYPE a; // narrower than TYPE
1958 BTYPE b; // narrower than TYPE
1960 1) Multiply high with scaling
1961 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1962 Here, c is bitsize (TYPE) / 2 - 1.
1964 2) ... or also with rounding
1965 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1966 Here, d is bitsize (TYPE) / 2 - 2.
1968 3) Normal multiply high
1969 TYPE res = ((TYPE) a * (TYPE) b) >> e;
1970 Here, e is bitsize (TYPE) / 2.
1972 where only the bottom half of res is used. */
1974 static gimple *
1975 vect_recog_mulhs_pattern (vec_info *vinfo,
1976 stmt_vec_info last_stmt_info, tree *type_out)
1978 /* Check for a right shift. */
1979 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1980 if (!last_stmt
1981 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1982 return NULL;
1984 /* Check that the shift result is wider than the users of the
1985 result need (i.e. that narrowing would be a natural choice). */
1986 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1987 unsigned int target_precision
1988 = vect_element_precision (last_stmt_info->min_output_precision);
1989 if (!INTEGRAL_TYPE_P (lhs_type)
1990 || target_precision >= TYPE_PRECISION (lhs_type))
1991 return NULL;
1993 /* Look through any change in sign on the outer shift input. */
1994 vect_unpromoted_value unprom_rshift_input;
1995 tree rshift_input = vect_look_through_possible_promotion
1996 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1997 if (!rshift_input
1998 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1999 != TYPE_PRECISION (lhs_type))
2000 return NULL;
2002 /* Get the definition of the shift input. */
2003 stmt_vec_info rshift_input_stmt_info
2004 = vect_get_internal_def (vinfo, rshift_input);
2005 if (!rshift_input_stmt_info)
2006 return NULL;
2007 gassign *rshift_input_stmt
2008 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2009 if (!rshift_input_stmt)
2010 return NULL;
2012 stmt_vec_info mulh_stmt_info;
2013 tree scale_term;
2014 bool rounding_p = false;
2016 /* Check for the presence of the rounding term. */
2017 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2019 /* Check that the outer shift was by 1. */
2020 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2021 return NULL;
2023 /* Check that the second operand of the PLUS_EXPR is 1. */
2024 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2025 return NULL;
2027 /* Look through any change in sign on the addition input. */
2028 vect_unpromoted_value unprom_plus_input;
2029 tree plus_input = vect_look_through_possible_promotion
2030 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2031 if (!plus_input
2032 || TYPE_PRECISION (TREE_TYPE (plus_input))
2033 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2034 return NULL;
2036 /* Get the definition of the multiply-high-scale part. */
2037 stmt_vec_info plus_input_stmt_info
2038 = vect_get_internal_def (vinfo, plus_input);
2039 if (!plus_input_stmt_info)
2040 return NULL;
2041 gassign *plus_input_stmt
2042 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2043 if (!plus_input_stmt
2044 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2045 return NULL;
2047 /* Look through any change in sign on the scaling input. */
2048 vect_unpromoted_value unprom_scale_input;
2049 tree scale_input = vect_look_through_possible_promotion
2050 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2051 if (!scale_input
2052 || TYPE_PRECISION (TREE_TYPE (scale_input))
2053 != TYPE_PRECISION (TREE_TYPE (plus_input)))
2054 return NULL;
2056 /* Get the definition of the multiply-high part. */
2057 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2058 if (!mulh_stmt_info)
2059 return NULL;
2061 /* Get the scaling term. */
2062 scale_term = gimple_assign_rhs2 (plus_input_stmt);
2063 rounding_p = true;
2065 else
2067 mulh_stmt_info = rshift_input_stmt_info;
2068 scale_term = gimple_assign_rhs2 (last_stmt);
2071 /* Check that the scaling factor is constant. */
2072 if (TREE_CODE (scale_term) != INTEGER_CST)
2073 return NULL;
2075 /* Check whether the scaling input term can be seen as two widened
2076 inputs multiplied together. */
2077 vect_unpromoted_value unprom_mult[2];
2078 tree new_type;
2079 unsigned int nops
2080 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2081 false, 2, unprom_mult, &new_type);
2082 if (nops != 2)
2083 return NULL;
2085 /* Adjust output precision. */
2086 if (TYPE_PRECISION (new_type) < target_precision)
2087 new_type = build_nonstandard_integer_type
2088 (target_precision, TYPE_UNSIGNED (new_type));
2090 unsigned mult_precision = TYPE_PRECISION (new_type);
2091 internal_fn ifn;
2092 /* Check that the scaling factor is expected. Instead of
2093 target_precision, we should use the one that we actually
2094 use for internal function. */
2095 if (rounding_p)
2097 /* Check pattern 2). */
2098 if (wi::to_widest (scale_term) + mult_precision + 2
2099 != TYPE_PRECISION (lhs_type))
2100 return NULL;
2102 ifn = IFN_MULHRS;
2104 else
2106 /* Check for pattern 1). */
2107 if (wi::to_widest (scale_term) + mult_precision + 1
2108 == TYPE_PRECISION (lhs_type))
2109 ifn = IFN_MULHS;
2110 /* Check for pattern 3). */
2111 else if (wi::to_widest (scale_term) + mult_precision
2112 == TYPE_PRECISION (lhs_type))
2113 ifn = IFN_MULH;
2114 else
2115 return NULL;
2118 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2120 /* Check for target support. */
2121 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2122 if (!new_vectype
2123 || !direct_internal_fn_supported_p
2124 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2125 return NULL;
2127 /* The IR requires a valid vector type for the cast result, even though
2128 it's likely to be discarded. */
2129 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2130 if (!*type_out)
2131 return NULL;
2133 /* Generate the IFN_MULHRS call. */
2134 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2135 tree new_ops[2];
2136 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2137 unprom_mult, new_vectype);
2138 gcall *mulhrs_stmt
2139 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2140 gimple_call_set_lhs (mulhrs_stmt, new_var);
2141 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2143 if (dump_enabled_p ())
2144 dump_printf_loc (MSG_NOTE, vect_location,
2145 "created pattern stmt: %G", mulhrs_stmt);
2147 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2148 mulhrs_stmt, new_vectype);
2151 /* Recognize the patterns:
2153 ATYPE a; // narrower than TYPE
2154 BTYPE b; // narrower than TYPE
2155 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2156 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2158 where only the bottom half of avg is used. Try to transform them into:
2160 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2161 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2163 followed by:
2165 TYPE avg = (TYPE) avg';
2167 where NTYPE is no wider than half of TYPE. Since only the bottom half
2168 of avg is used, all or part of the cast of avg' should become redundant.
2170 If there is no target support available, generate code to distribute rshift
2171 over plus and add a carry. */
2173 static gimple *
2174 vect_recog_average_pattern (vec_info *vinfo,
2175 stmt_vec_info last_stmt_info, tree *type_out)
2177 /* Check for a shift right by one bit. */
2178 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2179 if (!last_stmt
2180 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2181 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2182 return NULL;
2184 /* Check that the shift result is wider than the users of the
2185 result need (i.e. that narrowing would be a natural choice). */
2186 tree lhs = gimple_assign_lhs (last_stmt);
2187 tree type = TREE_TYPE (lhs);
2188 unsigned int target_precision
2189 = vect_element_precision (last_stmt_info->min_output_precision);
2190 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2191 return NULL;
2193 /* Look through any change in sign on the shift input. */
2194 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2195 vect_unpromoted_value unprom_plus;
2196 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2197 &unprom_plus);
2198 if (!rshift_rhs
2199 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2200 return NULL;
2202 /* Get the definition of the shift input. */
2203 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2204 if (!plus_stmt_info)
2205 return NULL;
2207 /* Check whether the shift input can be seen as a tree of additions on
2208 2 or 3 widened inputs.
2210 Note that the pattern should be a win even if the result of one or
2211 more additions is reused elsewhere: if the pattern matches, we'd be
2212 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2213 internal_fn ifn = IFN_AVG_FLOOR;
2214 vect_unpromoted_value unprom[3];
2215 tree new_type;
2216 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2217 WIDEN_PLUS_EXPR, false, 3,
2218 unprom, &new_type);
2219 if (nops == 0)
2220 return NULL;
2221 if (nops == 3)
2223 /* Check that one operand is 1. */
2224 unsigned int i;
2225 for (i = 0; i < 3; ++i)
2226 if (integer_onep (unprom[i].op))
2227 break;
2228 if (i == 3)
2229 return NULL;
2230 /* Throw away the 1 operand and keep the other two. */
2231 if (i < 2)
2232 unprom[i] = unprom[2];
2233 ifn = IFN_AVG_CEIL;
2236 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2238 /* We know that:
2240 (a) the operation can be viewed as:
2242 TYPE widened0 = (TYPE) UNPROM[0];
2243 TYPE widened1 = (TYPE) UNPROM[1];
2244 TYPE tmp1 = widened0 + widened1 {+ 1};
2245 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2247 (b) the first two statements are equivalent to:
2249 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2250 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2252 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2253 where sensible;
2255 (d) all the operations can be performed correctly at twice the width of
2256 NEW_TYPE, due to the nature of the average operation; and
2258 (e) users of the result of the right shift need only TARGET_PRECISION
2259 bits, where TARGET_PRECISION is no more than half of TYPE's
2260 precision.
2262 Under these circumstances, the only situation in which NEW_TYPE
2263 could be narrower than TARGET_PRECISION is if widened0, widened1
2264 and an addition result are all used more than once. Thus we can
2265 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2266 as "free", whereas widening the result of the average instruction
2267 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2268 therefore better not to go narrower than TARGET_PRECISION. */
2269 if (TYPE_PRECISION (new_type) < target_precision)
2270 new_type = build_nonstandard_integer_type (target_precision,
2271 TYPE_UNSIGNED (new_type));
2273 /* Check for target support. */
2274 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2275 if (!new_vectype)
2276 return NULL;
2278 bool fallback_p = false;
2280 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2282 else if (TYPE_UNSIGNED (new_type)
2283 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2284 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2285 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2286 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2287 fallback_p = true;
2288 else
2289 return NULL;
2291 /* The IR requires a valid vector type for the cast result, even though
2292 it's likely to be discarded. */
2293 *type_out = get_vectype_for_scalar_type (vinfo, type);
2294 if (!*type_out)
2295 return NULL;
2297 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2298 tree new_ops[2];
2299 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2300 unprom, new_vectype);
2302 if (fallback_p)
2304 /* As a fallback, generate code for following sequence:
2306 shifted_op0 = new_ops[0] >> 1;
2307 shifted_op1 = new_ops[1] >> 1;
2308 sum_of_shifted = shifted_op0 + shifted_op1;
2309 unmasked_carry = new_ops[0] and/or new_ops[1];
2310 carry = unmasked_carry & 1;
2311 new_var = sum_of_shifted + carry;
2314 tree one_cst = build_one_cst (new_type);
2315 gassign *g;
2317 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2318 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2319 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2321 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2322 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2323 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2325 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2326 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2327 shifted_op0, shifted_op1);
2328 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2330 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2331 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2332 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2333 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2335 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2336 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2337 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2339 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2340 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2343 /* Generate the IFN_AVG* call. */
2344 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2345 new_ops[1]);
2346 gimple_call_set_lhs (average_stmt, new_var);
2347 gimple_set_location (average_stmt, gimple_location (last_stmt));
2349 if (dump_enabled_p ())
2350 dump_printf_loc (MSG_NOTE, vect_location,
2351 "created pattern stmt: %G", average_stmt);
2353 return vect_convert_output (vinfo, last_stmt_info,
2354 type, average_stmt, new_vectype);
2357 /* Recognize cases in which the input to a cast is wider than its
2358 output, and the input is fed by a widening operation. Fold this
2359 by removing the unnecessary intermediate widening. E.g.:
2361 unsigned char a;
2362 unsigned int b = (unsigned int) a;
2363 unsigned short c = (unsigned short) b;
2367 unsigned short c = (unsigned short) a;
2369 Although this is rare in input IR, it is an expected side-effect
2370 of the over-widening pattern above.
2372 This is beneficial also for integer-to-float conversions, if the
2373 widened integer has more bits than the float, and if the unwidened
2374 input doesn't. */
2376 static gimple *
2377 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2378 stmt_vec_info last_stmt_info, tree *type_out)
2380 /* Check for a cast, including an integer-to-float conversion. */
2381 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2382 if (!last_stmt)
2383 return NULL;
2384 tree_code code = gimple_assign_rhs_code (last_stmt);
2385 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2386 return NULL;
2388 /* Make sure that the rhs is a scalar with a natural bitsize. */
2389 tree lhs = gimple_assign_lhs (last_stmt);
2390 if (!lhs)
2391 return NULL;
2392 tree lhs_type = TREE_TYPE (lhs);
2393 scalar_mode lhs_mode;
2394 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2395 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2396 return NULL;
2398 /* Check for a narrowing operation (from a vector point of view). */
2399 tree rhs = gimple_assign_rhs1 (last_stmt);
2400 tree rhs_type = TREE_TYPE (rhs);
2401 if (!INTEGRAL_TYPE_P (rhs_type)
2402 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2403 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2404 return NULL;
2406 /* Try to find an unpromoted input. */
2407 vect_unpromoted_value unprom;
2408 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2409 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2410 return NULL;
2412 /* If the bits above RHS_TYPE matter, make sure that they're the
2413 same when extending from UNPROM as they are when extending from RHS. */
2414 if (!INTEGRAL_TYPE_P (lhs_type)
2415 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2416 return NULL;
2418 /* We can get the same result by casting UNPROM directly, to avoid
2419 the unnecessary widening and narrowing. */
2420 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2422 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2423 if (!*type_out)
2424 return NULL;
2426 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2427 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2428 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2430 return pattern_stmt;
2433 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2434 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2436 static gimple *
2437 vect_recog_widen_shift_pattern (vec_info *vinfo,
2438 stmt_vec_info last_stmt_info, tree *type_out)
2440 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2441 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2442 "vect_recog_widen_shift_pattern");
2445 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2447 type a_t, b_t, c_t;
2449 S0 a_t = b_t r<< c_t;
2451 Input/Output:
2453 * STMT_VINFO: The stmt from which the pattern search begins,
2454 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2455 with a sequence:
2457 S1 d_t = -c_t;
2458 S2 e_t = d_t & (B - 1);
2459 S3 f_t = b_t << c_t;
2460 S4 g_t = b_t >> e_t;
2461 S0 a_t = f_t | g_t;
2463 where B is element bitsize of type.
2465 Output:
2467 * TYPE_OUT: The type of the output of this pattern.
2469 * Return value: A new stmt that will be used to replace the rotate
2470 S0 stmt. */
2472 static gimple *
2473 vect_recog_rotate_pattern (vec_info *vinfo,
2474 stmt_vec_info stmt_vinfo, tree *type_out)
2476 gimple *last_stmt = stmt_vinfo->stmt;
2477 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2478 gimple *pattern_stmt, *def_stmt;
2479 enum tree_code rhs_code;
2480 enum vect_def_type dt;
2481 optab optab1, optab2;
2482 edge ext_def = NULL;
2483 bool bswap16_p = false;
2485 if (is_gimple_assign (last_stmt))
2487 rhs_code = gimple_assign_rhs_code (last_stmt);
2488 switch (rhs_code)
2490 case LROTATE_EXPR:
2491 case RROTATE_EXPR:
2492 break;
2493 default:
2494 return NULL;
2497 lhs = gimple_assign_lhs (last_stmt);
2498 oprnd0 = gimple_assign_rhs1 (last_stmt);
2499 type = TREE_TYPE (oprnd0);
2500 oprnd1 = gimple_assign_rhs2 (last_stmt);
2502 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2504 /* __builtin_bswap16 (x) is another form of x r>> 8.
2505 The vectorizer has bswap support, but only if the argument isn't
2506 promoted. */
2507 lhs = gimple_call_lhs (last_stmt);
2508 oprnd0 = gimple_call_arg (last_stmt, 0);
2509 type = TREE_TYPE (oprnd0);
2510 if (!lhs
2511 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2512 || TYPE_PRECISION (type) <= 16
2513 || TREE_CODE (oprnd0) != SSA_NAME
2514 || BITS_PER_UNIT != 8
2515 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2516 return NULL;
2518 stmt_vec_info def_stmt_info;
2519 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2520 return NULL;
2522 if (dt != vect_internal_def)
2523 return NULL;
2525 if (gimple_assign_cast_p (def_stmt))
2527 def = gimple_assign_rhs1 (def_stmt);
2528 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2529 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2530 oprnd0 = def;
2533 type = TREE_TYPE (lhs);
2534 vectype = get_vectype_for_scalar_type (vinfo, type);
2535 if (vectype == NULL_TREE)
2536 return NULL;
2538 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2540 /* The encoding uses one stepped pattern for each byte in the
2541 16-bit word. */
2542 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2543 for (unsigned i = 0; i < 3; ++i)
2544 for (unsigned j = 0; j < 2; ++j)
2545 elts.quick_push ((i + 1) * 2 - j - 1);
2547 vec_perm_indices indices (elts, 1,
2548 TYPE_VECTOR_SUBPARTS (char_vectype));
2549 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2551 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2552 undo the argument promotion. */
2553 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2555 def = vect_recog_temp_ssa_var (type, NULL);
2556 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2557 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2558 oprnd0 = def;
2561 /* Pattern detected. */
2562 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2564 *type_out = vectype;
2566 /* Pattern supported. Create a stmt to be used to replace the
2567 pattern, with the unpromoted argument. */
2568 var = vect_recog_temp_ssa_var (type, NULL);
2569 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2570 1, oprnd0);
2571 gimple_call_set_lhs (pattern_stmt, var);
2572 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2573 gimple_call_fntype (last_stmt));
2574 return pattern_stmt;
2578 oprnd1 = build_int_cst (integer_type_node, 8);
2579 rhs_code = LROTATE_EXPR;
2580 bswap16_p = true;
2582 else
2583 return NULL;
2585 if (TREE_CODE (oprnd0) != SSA_NAME
2586 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2587 || !INTEGRAL_TYPE_P (type)
2588 || !TYPE_UNSIGNED (type))
2589 return NULL;
2591 stmt_vec_info def_stmt_info;
2592 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2593 return NULL;
2595 if (dt != vect_internal_def
2596 && dt != vect_constant_def
2597 && dt != vect_external_def)
2598 return NULL;
2600 vectype = get_vectype_for_scalar_type (vinfo, type);
2601 if (vectype == NULL_TREE)
2602 return NULL;
2604 /* If vector/vector or vector/scalar rotate is supported by the target,
2605 don't do anything here. */
2606 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2607 if (optab1
2608 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2610 use_rotate:
2611 if (bswap16_p)
2613 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2615 def = vect_recog_temp_ssa_var (type, NULL);
2616 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2617 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2618 oprnd0 = def;
2621 /* Pattern detected. */
2622 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2624 *type_out = vectype;
2626 /* Pattern supported. Create a stmt to be used to replace the
2627 pattern. */
2628 var = vect_recog_temp_ssa_var (type, NULL);
2629 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2630 oprnd1);
2631 return pattern_stmt;
2633 return NULL;
2636 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2638 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2639 if (optab2
2640 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2641 goto use_rotate;
2644 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2645 don't do anything here either. */
2646 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2647 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2648 if (!optab1
2649 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2650 || !optab2
2651 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2653 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2654 return NULL;
2655 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2656 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2657 if (!optab1
2658 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2659 || !optab2
2660 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2661 return NULL;
2664 *type_out = vectype;
2666 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2668 def = vect_recog_temp_ssa_var (type, NULL);
2669 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2670 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2671 oprnd0 = def;
2674 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2675 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2677 def = NULL_TREE;
2678 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2679 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2680 def = oprnd1;
2681 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2683 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2684 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2685 && TYPE_PRECISION (TREE_TYPE (rhs1))
2686 == TYPE_PRECISION (type))
2687 def = rhs1;
2690 if (def == NULL_TREE)
2692 def = vect_recog_temp_ssa_var (type, NULL);
2693 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2694 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2696 stype = TREE_TYPE (def);
2698 if (TREE_CODE (def) == INTEGER_CST)
2700 if (!tree_fits_uhwi_p (def)
2701 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2702 || integer_zerop (def))
2703 return NULL;
2704 def2 = build_int_cst (stype,
2705 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2707 else
2709 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2711 if (vecstype == NULL_TREE)
2712 return NULL;
2713 def2 = vect_recog_temp_ssa_var (stype, NULL);
2714 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2715 if (ext_def)
2717 basic_block new_bb
2718 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2719 gcc_assert (!new_bb);
2721 else
2722 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2724 def2 = vect_recog_temp_ssa_var (stype, NULL);
2725 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2726 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2727 gimple_assign_lhs (def_stmt), mask);
2728 if (ext_def)
2730 basic_block new_bb
2731 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2732 gcc_assert (!new_bb);
2734 else
2735 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2738 var1 = vect_recog_temp_ssa_var (type, NULL);
2739 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2740 ? LSHIFT_EXPR : RSHIFT_EXPR,
2741 oprnd0, def);
2742 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2744 var2 = vect_recog_temp_ssa_var (type, NULL);
2745 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2746 ? RSHIFT_EXPR : LSHIFT_EXPR,
2747 oprnd0, def2);
2748 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2750 /* Pattern detected. */
2751 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2753 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2754 var = vect_recog_temp_ssa_var (type, NULL);
2755 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2757 return pattern_stmt;
2760 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2761 vectorized:
2763 type a_t;
2764 TYPE b_T, res_T;
2766 S1 a_t = ;
2767 S2 b_T = ;
2768 S3 res_T = b_T op a_t;
2770 where type 'TYPE' is a type with different size than 'type',
2771 and op is <<, >> or rotate.
2773 Also detect cases:
2775 type a_t;
2776 TYPE b_T, c_T, res_T;
2778 S0 c_T = ;
2779 S1 a_t = (type) c_T;
2780 S2 b_T = ;
2781 S3 res_T = b_T op a_t;
2783 Input/Output:
2785 * STMT_VINFO: The stmt from which the pattern search begins,
2786 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2787 with a shift/rotate which has same type on both operands, in the
2788 second case just b_T op c_T, in the first case with added cast
2789 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2791 Output:
2793 * TYPE_OUT: The type of the output of this pattern.
2795 * Return value: A new stmt that will be used to replace the shift/rotate
2796 S3 stmt. */
2798 static gimple *
2799 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2800 stmt_vec_info stmt_vinfo,
2801 tree *type_out)
2803 gimple *last_stmt = stmt_vinfo->stmt;
2804 tree oprnd0, oprnd1, lhs, var;
2805 gimple *pattern_stmt;
2806 enum tree_code rhs_code;
2808 if (!is_gimple_assign (last_stmt))
2809 return NULL;
2811 rhs_code = gimple_assign_rhs_code (last_stmt);
2812 switch (rhs_code)
2814 case LSHIFT_EXPR:
2815 case RSHIFT_EXPR:
2816 case LROTATE_EXPR:
2817 case RROTATE_EXPR:
2818 break;
2819 default:
2820 return NULL;
2823 lhs = gimple_assign_lhs (last_stmt);
2824 oprnd0 = gimple_assign_rhs1 (last_stmt);
2825 oprnd1 = gimple_assign_rhs2 (last_stmt);
2826 if (TREE_CODE (oprnd0) != SSA_NAME
2827 || TREE_CODE (oprnd1) != SSA_NAME
2828 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2829 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2830 || TYPE_PRECISION (TREE_TYPE (lhs))
2831 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2832 return NULL;
2834 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2835 if (!def_vinfo)
2836 return NULL;
2838 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2839 if (*type_out == NULL_TREE)
2840 return NULL;
2842 tree def = NULL_TREE;
2843 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2844 if (def_stmt && gimple_assign_cast_p (def_stmt))
2846 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2847 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2848 && TYPE_PRECISION (TREE_TYPE (rhs1))
2849 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2851 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2852 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2853 def = rhs1;
2854 else
2856 tree mask
2857 = build_low_bits_mask (TREE_TYPE (rhs1),
2858 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2859 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2860 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2861 tree vecstype = get_vectype_for_scalar_type (vinfo,
2862 TREE_TYPE (rhs1));
2863 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2868 if (def == NULL_TREE)
2870 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2871 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2872 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2875 /* Pattern detected. */
2876 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2878 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2879 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2880 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2882 return pattern_stmt;
2885 /* Return true iff the target has a vector optab implementing the operation
2886 CODE on type VECTYPE. */
2888 static bool
2889 target_has_vecop_for_code (tree_code code, tree vectype)
2891 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2892 return voptab
2893 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2896 /* Verify that the target has optabs of VECTYPE to perform all the steps
2897 needed by the multiplication-by-immediate synthesis algorithm described by
2898 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2899 present. Return true iff the target supports all the steps. */
2901 static bool
2902 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2903 tree vectype, bool synth_shift_p)
2905 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2906 return false;
2908 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2909 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2911 if (var == negate_variant
2912 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2913 return false;
2915 /* If we must synthesize shifts with additions make sure that vector
2916 addition is available. */
2917 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2918 return false;
2920 for (int i = 1; i < alg->ops; i++)
2922 switch (alg->op[i])
2924 case alg_shift:
2925 break;
2926 case alg_add_t_m2:
2927 case alg_add_t2_m:
2928 case alg_add_factor:
2929 if (!supports_vplus)
2930 return false;
2931 break;
2932 case alg_sub_t_m2:
2933 case alg_sub_t2_m:
2934 case alg_sub_factor:
2935 if (!supports_vminus)
2936 return false;
2937 break;
2938 case alg_unknown:
2939 case alg_m:
2940 case alg_zero:
2941 case alg_impossible:
2942 return false;
2943 default:
2944 gcc_unreachable ();
2948 return true;
2951 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2952 putting the final result in DEST. Append all statements but the last into
2953 VINFO. Return the last statement. */
2955 static gimple *
2956 synth_lshift_by_additions (vec_info *vinfo,
2957 tree dest, tree op, HOST_WIDE_INT amnt,
2958 stmt_vec_info stmt_info)
2960 HOST_WIDE_INT i;
2961 tree itype = TREE_TYPE (op);
2962 tree prev_res = op;
2963 gcc_assert (amnt >= 0);
2964 for (i = 0; i < amnt; i++)
2966 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2967 : dest;
2968 gimple *stmt
2969 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2970 prev_res = tmp_var;
2971 if (i < amnt - 1)
2972 append_pattern_def_seq (vinfo, stmt_info, stmt);
2973 else
2974 return stmt;
2976 gcc_unreachable ();
2977 return NULL;
2980 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2981 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2982 the process if necessary. Append the resulting assignment statements
2983 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2984 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2985 left shifts using additions. */
2987 static tree
2988 apply_binop_and_append_stmt (vec_info *vinfo,
2989 tree_code code, tree op1, tree op2,
2990 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2992 if (integer_zerop (op2)
2993 && (code == LSHIFT_EXPR
2994 || code == PLUS_EXPR))
2996 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2997 return op1;
3000 gimple *stmt;
3001 tree itype = TREE_TYPE (op1);
3002 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3004 if (code == LSHIFT_EXPR
3005 && synth_shift_p)
3007 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3008 TREE_INT_CST_LOW (op2), stmt_vinfo);
3009 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3010 return tmp_var;
3013 stmt = gimple_build_assign (tmp_var, code, op1, op2);
3014 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3015 return tmp_var;
3018 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3019 and simple arithmetic operations to be vectorized. Record the statements
3020 produced in STMT_VINFO and return the last statement in the sequence or
3021 NULL if it's not possible to synthesize such a multiplication.
3022 This function mirrors the behavior of expand_mult_const in expmed.c but
3023 works on tree-ssa form. */
3025 static gimple *
3026 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3027 stmt_vec_info stmt_vinfo)
3029 tree itype = TREE_TYPE (op);
3030 machine_mode mode = TYPE_MODE (itype);
3031 struct algorithm alg;
3032 mult_variant variant;
3033 if (!tree_fits_shwi_p (val))
3034 return NULL;
3036 /* Multiplication synthesis by shifts, adds and subs can introduce
3037 signed overflow where the original operation didn't. Perform the
3038 operations on an unsigned type and cast back to avoid this.
3039 In the future we may want to relax this for synthesis algorithms
3040 that we can prove do not cause unexpected overflow. */
3041 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3043 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3045 /* Targets that don't support vector shifts but support vector additions
3046 can synthesize shifts that way. */
3047 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3049 HOST_WIDE_INT hwval = tree_to_shwi (val);
3050 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3051 The vectorizer's benefit analysis will decide whether it's beneficial
3052 to do this. */
3053 bool possible = choose_mult_variant (mode, hwval, &alg,
3054 &variant, MAX_COST);
3055 if (!possible)
3056 return NULL;
3058 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3060 if (!vectype
3061 || !target_supports_mult_synth_alg (&alg, variant,
3062 vectype, synth_shift_p))
3063 return NULL;
3065 tree accumulator;
3067 /* Clear out the sequence of statements so we can populate it below. */
3068 gimple *stmt = NULL;
3070 if (cast_to_unsigned_p)
3072 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3073 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3074 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3075 op = tmp_op;
3078 if (alg.op[0] == alg_zero)
3079 accumulator = build_int_cst (multtype, 0);
3080 else
3081 accumulator = op;
3083 bool needs_fixup = (variant == negate_variant)
3084 || (variant == add_variant);
3086 for (int i = 1; i < alg.ops; i++)
3088 tree shft_log = build_int_cst (multtype, alg.log[i]);
3089 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3090 tree tmp_var = NULL_TREE;
3092 switch (alg.op[i])
3094 case alg_shift:
3095 if (synth_shift_p)
3096 stmt
3097 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3098 alg.log[i], stmt_vinfo);
3099 else
3100 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3101 shft_log);
3102 break;
3103 case alg_add_t_m2:
3104 tmp_var
3105 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3106 stmt_vinfo, synth_shift_p);
3107 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3108 tmp_var);
3109 break;
3110 case alg_sub_t_m2:
3111 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3112 shft_log, stmt_vinfo,
3113 synth_shift_p);
3114 /* In some algorithms the first step involves zeroing the
3115 accumulator. If subtracting from such an accumulator
3116 just emit the negation directly. */
3117 if (integer_zerop (accumulator))
3118 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3119 else
3120 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3121 tmp_var);
3122 break;
3123 case alg_add_t2_m:
3124 tmp_var
3125 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3126 shft_log, stmt_vinfo, synth_shift_p);
3127 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3128 break;
3129 case alg_sub_t2_m:
3130 tmp_var
3131 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3132 shft_log, stmt_vinfo, synth_shift_p);
3133 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3134 break;
3135 case alg_add_factor:
3136 tmp_var
3137 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3138 shft_log, stmt_vinfo, synth_shift_p);
3139 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3140 tmp_var);
3141 break;
3142 case alg_sub_factor:
3143 tmp_var
3144 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3145 shft_log, stmt_vinfo, synth_shift_p);
3146 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3147 accumulator);
3148 break;
3149 default:
3150 gcc_unreachable ();
3152 /* We don't want to append the last stmt in the sequence to stmt_vinfo
3153 but rather return it directly. */
3155 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3156 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3157 accumulator = accum_tmp;
3159 if (variant == negate_variant)
3161 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3162 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3163 accumulator = accum_tmp;
3164 if (cast_to_unsigned_p)
3165 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3167 else if (variant == add_variant)
3169 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3170 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3171 accumulator = accum_tmp;
3172 if (cast_to_unsigned_p)
3173 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3175 /* Move back to a signed if needed. */
3176 if (cast_to_unsigned_p)
3178 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3179 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3182 return stmt;
3185 /* Detect multiplication by constant and convert it into a sequence of
3186 shifts and additions, subtractions, negations. We reuse the
3187 choose_mult_variant algorithms from expmed.c
3189 Input/Output:
3191 STMT_VINFO: The stmt from which the pattern search begins,
3192 i.e. the mult stmt.
3194 Output:
3196 * TYPE_OUT: The type of the output of this pattern.
3198 * Return value: A new stmt that will be used to replace
3199 the multiplication. */
3201 static gimple *
3202 vect_recog_mult_pattern (vec_info *vinfo,
3203 stmt_vec_info stmt_vinfo, tree *type_out)
3205 gimple *last_stmt = stmt_vinfo->stmt;
3206 tree oprnd0, oprnd1, vectype, itype;
3207 gimple *pattern_stmt;
3209 if (!is_gimple_assign (last_stmt))
3210 return NULL;
3212 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3213 return NULL;
3215 oprnd0 = gimple_assign_rhs1 (last_stmt);
3216 oprnd1 = gimple_assign_rhs2 (last_stmt);
3217 itype = TREE_TYPE (oprnd0);
3219 if (TREE_CODE (oprnd0) != SSA_NAME
3220 || TREE_CODE (oprnd1) != INTEGER_CST
3221 || !INTEGRAL_TYPE_P (itype)
3222 || !type_has_mode_precision_p (itype))
3223 return NULL;
3225 vectype = get_vectype_for_scalar_type (vinfo, itype);
3226 if (vectype == NULL_TREE)
3227 return NULL;
3229 /* If the target can handle vectorized multiplication natively,
3230 don't attempt to optimize this. */
3231 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3232 if (mul_optab != unknown_optab)
3234 machine_mode vec_mode = TYPE_MODE (vectype);
3235 int icode = (int) optab_handler (mul_optab, vec_mode);
3236 if (icode != CODE_FOR_nothing)
3237 return NULL;
3240 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3241 oprnd0, oprnd1, stmt_vinfo);
3242 if (!pattern_stmt)
3243 return NULL;
3245 /* Pattern detected. */
3246 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3248 *type_out = vectype;
3250 return pattern_stmt;
3253 /* Detect a signed division by a constant that wouldn't be
3254 otherwise vectorized:
3256 type a_t, b_t;
3258 S1 a_t = b_t / N;
3260 where type 'type' is an integral type and N is a constant.
3262 Similarly handle modulo by a constant:
3264 S4 a_t = b_t % N;
3266 Input/Output:
3268 * STMT_VINFO: The stmt from which the pattern search begins,
3269 i.e. the division stmt. S1 is replaced by if N is a power
3270 of two constant and type is signed:
3271 S3 y_t = b_t < 0 ? N - 1 : 0;
3272 S2 x_t = b_t + y_t;
3273 S1' a_t = x_t >> log2 (N);
3275 S4 is replaced if N is a power of two constant and
3276 type is signed by (where *_T temporaries have unsigned type):
3277 S9 y_T = b_t < 0 ? -1U : 0U;
3278 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3279 S7 z_t = (type) z_T;
3280 S6 w_t = b_t + z_t;
3281 S5 x_t = w_t & (N - 1);
3282 S4' a_t = x_t - z_t;
3284 Output:
3286 * TYPE_OUT: The type of the output of this pattern.
3288 * Return value: A new stmt that will be used to replace the division
3289 S1 or modulo S4 stmt. */
3291 static gimple *
3292 vect_recog_divmod_pattern (vec_info *vinfo,
3293 stmt_vec_info stmt_vinfo, tree *type_out)
3295 gimple *last_stmt = stmt_vinfo->stmt;
3296 tree oprnd0, oprnd1, vectype, itype, cond;
3297 gimple *pattern_stmt, *def_stmt;
3298 enum tree_code rhs_code;
3299 optab optab;
3300 tree q;
3301 int dummy_int, prec;
3303 if (!is_gimple_assign (last_stmt))
3304 return NULL;
3306 rhs_code = gimple_assign_rhs_code (last_stmt);
3307 switch (rhs_code)
3309 case TRUNC_DIV_EXPR:
3310 case EXACT_DIV_EXPR:
3311 case TRUNC_MOD_EXPR:
3312 break;
3313 default:
3314 return NULL;
3317 oprnd0 = gimple_assign_rhs1 (last_stmt);
3318 oprnd1 = gimple_assign_rhs2 (last_stmt);
3319 itype = TREE_TYPE (oprnd0);
3320 if (TREE_CODE (oprnd0) != SSA_NAME
3321 || TREE_CODE (oprnd1) != INTEGER_CST
3322 || TREE_CODE (itype) != INTEGER_TYPE
3323 || !type_has_mode_precision_p (itype))
3324 return NULL;
3326 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3327 vectype = get_vectype_for_scalar_type (vinfo, itype);
3328 if (vectype == NULL_TREE)
3329 return NULL;
3331 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3333 /* If the target can handle vectorized division or modulo natively,
3334 don't attempt to optimize this, since native division is likely
3335 to give smaller code. */
3336 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3337 if (optab != unknown_optab)
3339 machine_mode vec_mode = TYPE_MODE (vectype);
3340 int icode = (int) optab_handler (optab, vec_mode);
3341 if (icode != CODE_FOR_nothing)
3342 return NULL;
3346 prec = TYPE_PRECISION (itype);
3347 if (integer_pow2p (oprnd1))
3349 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3350 return NULL;
3352 /* Pattern detected. */
3353 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3355 *type_out = vectype;
3357 /* Check if the target supports this internal function. */
3358 internal_fn ifn = IFN_DIV_POW2;
3359 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3361 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3363 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3364 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3365 gimple_call_set_lhs (div_stmt, var_div);
3367 if (rhs_code == TRUNC_MOD_EXPR)
3369 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3370 def_stmt
3371 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3372 LSHIFT_EXPR, var_div, shift);
3373 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3374 pattern_stmt
3375 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3376 MINUS_EXPR, oprnd0,
3377 gimple_assign_lhs (def_stmt));
3379 else
3380 pattern_stmt = div_stmt;
3381 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3383 return pattern_stmt;
3386 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3387 build_int_cst (itype, 0));
3388 if (rhs_code == TRUNC_DIV_EXPR
3389 || rhs_code == EXACT_DIV_EXPR)
3391 tree var = vect_recog_temp_ssa_var (itype, NULL);
3392 tree shift;
3393 def_stmt
3394 = gimple_build_assign (var, COND_EXPR, cond,
3395 fold_build2 (MINUS_EXPR, itype, oprnd1,
3396 build_int_cst (itype, 1)),
3397 build_int_cst (itype, 0));
3398 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3399 var = vect_recog_temp_ssa_var (itype, NULL);
3400 def_stmt
3401 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3402 gimple_assign_lhs (def_stmt));
3403 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3405 shift = build_int_cst (itype, tree_log2 (oprnd1));
3406 pattern_stmt
3407 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3408 RSHIFT_EXPR, var, shift);
3410 else
3412 tree signmask;
3413 if (compare_tree_int (oprnd1, 2) == 0)
3415 signmask = vect_recog_temp_ssa_var (itype, NULL);
3416 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3417 build_int_cst (itype, 1),
3418 build_int_cst (itype, 0));
3419 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3421 else
3423 tree utype
3424 = build_nonstandard_integer_type (prec, 1);
3425 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3426 tree shift
3427 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3428 - tree_log2 (oprnd1));
3429 tree var = vect_recog_temp_ssa_var (utype, NULL);
3431 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3432 build_int_cst (utype, -1),
3433 build_int_cst (utype, 0));
3434 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3435 var = vect_recog_temp_ssa_var (utype, NULL);
3436 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3437 gimple_assign_lhs (def_stmt),
3438 shift);
3439 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3440 signmask = vect_recog_temp_ssa_var (itype, NULL);
3441 def_stmt
3442 = gimple_build_assign (signmask, NOP_EXPR, var);
3443 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3445 def_stmt
3446 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3447 PLUS_EXPR, oprnd0, signmask);
3448 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3449 def_stmt
3450 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3451 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3452 fold_build2 (MINUS_EXPR, itype, oprnd1,
3453 build_int_cst (itype, 1)));
3454 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3456 pattern_stmt
3457 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3458 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3459 signmask);
3462 return pattern_stmt;
3465 if (prec > HOST_BITS_PER_WIDE_INT
3466 || integer_zerop (oprnd1))
3467 return NULL;
3469 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3470 return NULL;
3472 if (TYPE_UNSIGNED (itype))
3474 unsigned HOST_WIDE_INT mh, ml;
3475 int pre_shift, post_shift;
3476 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3477 & GET_MODE_MASK (itype_mode));
3478 tree t1, t2, t3, t4;
3480 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3481 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3482 return NULL;
3484 /* Find a suitable multiplier and right shift count
3485 instead of multiplying with D. */
3486 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3488 /* If the suggested multiplier is more than SIZE bits, we can do better
3489 for even divisors, using an initial right shift. */
3490 if (mh != 0 && (d & 1) == 0)
3492 pre_shift = ctz_or_zero (d);
3493 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3494 &ml, &post_shift, &dummy_int);
3495 gcc_assert (!mh);
3497 else
3498 pre_shift = 0;
3500 if (mh != 0)
3502 if (post_shift - 1 >= prec)
3503 return NULL;
3505 /* t1 = oprnd0 h* ml;
3506 t2 = oprnd0 - t1;
3507 t3 = t2 >> 1;
3508 t4 = t1 + t3;
3509 q = t4 >> (post_shift - 1); */
3510 t1 = vect_recog_temp_ssa_var (itype, NULL);
3511 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3512 build_int_cst (itype, ml));
3513 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3515 t2 = vect_recog_temp_ssa_var (itype, NULL);
3516 def_stmt
3517 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3518 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3520 t3 = vect_recog_temp_ssa_var (itype, NULL);
3521 def_stmt
3522 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3523 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3525 t4 = vect_recog_temp_ssa_var (itype, NULL);
3526 def_stmt
3527 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3529 if (post_shift != 1)
3531 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3533 q = vect_recog_temp_ssa_var (itype, NULL);
3534 pattern_stmt
3535 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3536 build_int_cst (itype, post_shift - 1));
3538 else
3540 q = t4;
3541 pattern_stmt = def_stmt;
3544 else
3546 if (pre_shift >= prec || post_shift >= prec)
3547 return NULL;
3549 /* t1 = oprnd0 >> pre_shift;
3550 t2 = t1 h* ml;
3551 q = t2 >> post_shift; */
3552 if (pre_shift)
3554 t1 = vect_recog_temp_ssa_var (itype, NULL);
3555 def_stmt
3556 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3557 build_int_cst (NULL, pre_shift));
3558 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3560 else
3561 t1 = oprnd0;
3563 t2 = vect_recog_temp_ssa_var (itype, NULL);
3564 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3565 build_int_cst (itype, ml));
3567 if (post_shift)
3569 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3571 q = vect_recog_temp_ssa_var (itype, NULL);
3572 def_stmt
3573 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3574 build_int_cst (itype, post_shift));
3576 else
3577 q = t2;
3579 pattern_stmt = def_stmt;
3582 else
3584 unsigned HOST_WIDE_INT ml;
3585 int post_shift;
3586 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3587 unsigned HOST_WIDE_INT abs_d;
3588 bool add = false;
3589 tree t1, t2, t3, t4;
3591 /* Give up for -1. */
3592 if (d == -1)
3593 return NULL;
3595 /* Since d might be INT_MIN, we have to cast to
3596 unsigned HOST_WIDE_INT before negating to avoid
3597 undefined signed overflow. */
3598 abs_d = (d >= 0
3599 ? (unsigned HOST_WIDE_INT) d
3600 : - (unsigned HOST_WIDE_INT) d);
3602 /* n rem d = n rem -d */
3603 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3605 d = abs_d;
3606 oprnd1 = build_int_cst (itype, abs_d);
3608 if (HOST_BITS_PER_WIDE_INT >= prec
3609 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3610 /* This case is not handled correctly below. */
3611 return NULL;
3613 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3614 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3616 add = true;
3617 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3619 if (post_shift >= prec)
3620 return NULL;
3622 /* t1 = oprnd0 h* ml; */
3623 t1 = vect_recog_temp_ssa_var (itype, NULL);
3624 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3625 build_int_cst (itype, ml));
3627 if (add)
3629 /* t2 = t1 + oprnd0; */
3630 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3631 t2 = vect_recog_temp_ssa_var (itype, NULL);
3632 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3634 else
3635 t2 = t1;
3637 if (post_shift)
3639 /* t3 = t2 >> post_shift; */
3640 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3641 t3 = vect_recog_temp_ssa_var (itype, NULL);
3642 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3643 build_int_cst (itype, post_shift));
3645 else
3646 t3 = t2;
3648 int msb = 1;
3649 value_range r;
3650 get_range_query (cfun)->range_of_expr (r, oprnd0);
3651 if (r.kind () == VR_RANGE)
3653 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
3654 msb = 0;
3655 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
3656 msb = -1;
3659 if (msb == 0 && d >= 0)
3661 /* q = t3; */
3662 q = t3;
3663 pattern_stmt = def_stmt;
3665 else
3667 /* t4 = oprnd0 >> (prec - 1);
3668 or if we know from VRP that oprnd0 >= 0
3669 t4 = 0;
3670 or if we know from VRP that oprnd0 < 0
3671 t4 = -1; */
3672 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3673 t4 = vect_recog_temp_ssa_var (itype, NULL);
3674 if (msb != 1)
3675 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3676 build_int_cst (itype, msb));
3677 else
3678 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3679 build_int_cst (itype, prec - 1));
3680 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3682 /* q = t3 - t4; or q = t4 - t3; */
3683 q = vect_recog_temp_ssa_var (itype, NULL);
3684 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3685 d < 0 ? t3 : t4);
3689 if (rhs_code == TRUNC_MOD_EXPR)
3691 tree r, t1;
3693 /* We divided. Now finish by:
3694 t1 = q * oprnd1;
3695 r = oprnd0 - t1; */
3696 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3698 t1 = vect_recog_temp_ssa_var (itype, NULL);
3699 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3700 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3702 r = vect_recog_temp_ssa_var (itype, NULL);
3703 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3706 /* Pattern detected. */
3707 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3709 *type_out = vectype;
3710 return pattern_stmt;
3713 /* Function vect_recog_mixed_size_cond_pattern
3715 Try to find the following pattern:
3717 type x_t, y_t;
3718 TYPE a_T, b_T, c_T;
3719 loop:
3720 S1 a_T = x_t CMP y_t ? b_T : c_T;
3722 where type 'TYPE' is an integral type which has different size
3723 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3724 than 'type', the constants need to fit into an integer type
3725 with the same width as 'type') or results of conversion from 'type'.
3727 Input:
3729 * STMT_VINFO: The stmt from which the pattern search begins.
3731 Output:
3733 * TYPE_OUT: The type of the output of this pattern.
3735 * Return value: A new stmt that will be used to replace the pattern.
3736 Additionally a def_stmt is added.
3738 a_it = x_t CMP y_t ? b_it : c_it;
3739 a_T = (TYPE) a_it; */
3741 static gimple *
3742 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3743 stmt_vec_info stmt_vinfo, tree *type_out)
3745 gimple *last_stmt = stmt_vinfo->stmt;
3746 tree cond_expr, then_clause, else_clause;
3747 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3748 gimple *pattern_stmt, *def_stmt;
3749 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3750 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3751 bool promotion;
3752 tree comp_scalar_type;
3754 if (!is_gimple_assign (last_stmt)
3755 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3756 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3757 return NULL;
3759 cond_expr = gimple_assign_rhs1 (last_stmt);
3760 then_clause = gimple_assign_rhs2 (last_stmt);
3761 else_clause = gimple_assign_rhs3 (last_stmt);
3763 if (!COMPARISON_CLASS_P (cond_expr))
3764 return NULL;
3766 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3767 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3768 if (comp_vectype == NULL_TREE)
3769 return NULL;
3771 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3772 if (types_compatible_p (type, comp_scalar_type)
3773 || ((TREE_CODE (then_clause) != INTEGER_CST
3774 || TREE_CODE (else_clause) != INTEGER_CST)
3775 && !INTEGRAL_TYPE_P (comp_scalar_type))
3776 || !INTEGRAL_TYPE_P (type))
3777 return NULL;
3779 if ((TREE_CODE (then_clause) != INTEGER_CST
3780 && !type_conversion_p (vinfo, then_clause, false,
3781 &orig_type0, &def_stmt0, &promotion))
3782 || (TREE_CODE (else_clause) != INTEGER_CST
3783 && !type_conversion_p (vinfo, else_clause, false,
3784 &orig_type1, &def_stmt1, &promotion)))
3785 return NULL;
3787 if (orig_type0 && orig_type1
3788 && !types_compatible_p (orig_type0, orig_type1))
3789 return NULL;
3791 if (orig_type0)
3793 if (!types_compatible_p (orig_type0, comp_scalar_type))
3794 return NULL;
3795 then_clause = gimple_assign_rhs1 (def_stmt0);
3796 itype = orig_type0;
3799 if (orig_type1)
3801 if (!types_compatible_p (orig_type1, comp_scalar_type))
3802 return NULL;
3803 else_clause = gimple_assign_rhs1 (def_stmt1);
3804 itype = orig_type1;
3808 HOST_WIDE_INT cmp_mode_size
3809 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3811 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3812 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3813 return NULL;
3815 vectype = get_vectype_for_scalar_type (vinfo, type);
3816 if (vectype == NULL_TREE)
3817 return NULL;
3819 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3820 return NULL;
3822 if (itype == NULL_TREE)
3823 itype = build_nonstandard_integer_type (cmp_mode_size,
3824 TYPE_UNSIGNED (type));
3826 if (itype == NULL_TREE
3827 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3828 return NULL;
3830 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3831 if (vecitype == NULL_TREE)
3832 return NULL;
3834 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3835 return NULL;
3837 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3839 if ((TREE_CODE (then_clause) == INTEGER_CST
3840 && !int_fits_type_p (then_clause, itype))
3841 || (TREE_CODE (else_clause) == INTEGER_CST
3842 && !int_fits_type_p (else_clause, itype)))
3843 return NULL;
3846 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3847 COND_EXPR, unshare_expr (cond_expr),
3848 fold_convert (itype, then_clause),
3849 fold_convert (itype, else_clause));
3850 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3851 NOP_EXPR, gimple_assign_lhs (def_stmt));
3853 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3854 *type_out = vectype;
3856 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3858 return pattern_stmt;
3862 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3863 true if bool VAR can and should be optimized that way. Assume it shouldn't
3864 in case it's a result of a comparison which can be directly vectorized into
3865 a vector comparison. Fills in STMTS with all stmts visited during the
3866 walk. */
3868 static bool
3869 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3871 tree rhs1;
3872 enum tree_code rhs_code;
3874 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3875 if (!def_stmt_info)
3876 return false;
3878 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3879 if (!def_stmt)
3880 return false;
3882 if (stmts.contains (def_stmt))
3883 return true;
3885 rhs1 = gimple_assign_rhs1 (def_stmt);
3886 rhs_code = gimple_assign_rhs_code (def_stmt);
3887 switch (rhs_code)
3889 case SSA_NAME:
3890 if (! check_bool_pattern (rhs1, vinfo, stmts))
3891 return false;
3892 break;
3894 CASE_CONVERT:
3895 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3896 return false;
3897 if (! check_bool_pattern (rhs1, vinfo, stmts))
3898 return false;
3899 break;
3901 case BIT_NOT_EXPR:
3902 if (! check_bool_pattern (rhs1, vinfo, stmts))
3903 return false;
3904 break;
3906 case BIT_AND_EXPR:
3907 case BIT_IOR_EXPR:
3908 case BIT_XOR_EXPR:
3909 if (! check_bool_pattern (rhs1, vinfo, stmts)
3910 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3911 return false;
3912 break;
3914 default:
3915 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3917 tree vecitype, comp_vectype;
3919 /* If the comparison can throw, then is_gimple_condexpr will be
3920 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3921 if (stmt_could_throw_p (cfun, def_stmt))
3922 return false;
3924 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3925 if (comp_vectype == NULL_TREE)
3926 return false;
3928 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3929 TREE_TYPE (rhs1));
3930 if (mask_type
3931 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3932 return false;
3934 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3936 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3937 tree itype
3938 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3939 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3940 if (vecitype == NULL_TREE)
3941 return false;
3943 else
3944 vecitype = comp_vectype;
3945 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3946 return false;
3948 else
3949 return false;
3950 break;
3953 bool res = stmts.add (def_stmt);
3954 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3955 gcc_assert (!res);
3957 return true;
3961 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3962 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3963 pattern sequence. */
3965 static tree
3966 adjust_bool_pattern_cast (vec_info *vinfo,
3967 tree type, tree var, stmt_vec_info stmt_info)
3969 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3970 NOP_EXPR, var);
3971 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
3972 get_vectype_for_scalar_type (vinfo, type));
3973 return gimple_assign_lhs (cast_stmt);
3976 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3977 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3978 type, OUT_TYPE is the desired final integer type of the whole pattern.
3979 STMT_INFO is the info of the pattern root and is where pattern stmts should
3980 be associated with. DEFS is a map of pattern defs. */
3982 static void
3983 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
3984 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3986 gimple *stmt = SSA_NAME_DEF_STMT (var);
3987 enum tree_code rhs_code, def_rhs_code;
3988 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3989 location_t loc;
3990 gimple *pattern_stmt, *def_stmt;
3991 tree trueval = NULL_TREE;
3993 rhs1 = gimple_assign_rhs1 (stmt);
3994 rhs2 = gimple_assign_rhs2 (stmt);
3995 rhs_code = gimple_assign_rhs_code (stmt);
3996 loc = gimple_location (stmt);
3997 switch (rhs_code)
3999 case SSA_NAME:
4000 CASE_CONVERT:
4001 irhs1 = *defs.get (rhs1);
4002 itype = TREE_TYPE (irhs1);
4003 pattern_stmt
4004 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4005 SSA_NAME, irhs1);
4006 break;
4008 case BIT_NOT_EXPR:
4009 irhs1 = *defs.get (rhs1);
4010 itype = TREE_TYPE (irhs1);
4011 pattern_stmt
4012 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4013 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4014 break;
4016 case BIT_AND_EXPR:
4017 /* Try to optimize x = y & (a < b ? 1 : 0); into
4018 x = (a < b ? y : 0);
4020 E.g. for:
4021 bool a_b, b_b, c_b;
4022 TYPE d_T;
4024 S1 a_b = x1 CMP1 y1;
4025 S2 b_b = x2 CMP2 y2;
4026 S3 c_b = a_b & b_b;
4027 S4 d_T = (TYPE) c_b;
4029 we would normally emit:
4031 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4032 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4033 S3' c_T = a_T & b_T;
4034 S4' d_T = c_T;
4036 but we can save one stmt by using the
4037 result of one of the COND_EXPRs in the other COND_EXPR and leave
4038 BIT_AND_EXPR stmt out:
4040 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4041 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4042 S4' f_T = c_T;
4044 At least when VEC_COND_EXPR is implemented using masks
4045 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4046 computes the comparison masks and ands it, in one case with
4047 all ones vector, in the other case with a vector register.
4048 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4049 often more expensive. */
4050 def_stmt = SSA_NAME_DEF_STMT (rhs2);
4051 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4052 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4054 irhs1 = *defs.get (rhs1);
4055 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4056 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4057 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4059 rhs_code = def_rhs_code;
4060 rhs1 = def_rhs1;
4061 rhs2 = gimple_assign_rhs2 (def_stmt);
4062 trueval = irhs1;
4063 goto do_compare;
4065 else
4066 irhs2 = *defs.get (rhs2);
4067 goto and_ior_xor;
4069 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4070 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4071 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4073 irhs2 = *defs.get (rhs2);
4074 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4075 if (TYPE_PRECISION (TREE_TYPE (irhs2))
4076 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4078 rhs_code = def_rhs_code;
4079 rhs1 = def_rhs1;
4080 rhs2 = gimple_assign_rhs2 (def_stmt);
4081 trueval = irhs2;
4082 goto do_compare;
4084 else
4085 irhs1 = *defs.get (rhs1);
4086 goto and_ior_xor;
4088 /* FALLTHRU */
4089 case BIT_IOR_EXPR:
4090 case BIT_XOR_EXPR:
4091 irhs1 = *defs.get (rhs1);
4092 irhs2 = *defs.get (rhs2);
4093 and_ior_xor:
4094 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4095 != TYPE_PRECISION (TREE_TYPE (irhs2)))
4097 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4098 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4099 int out_prec = TYPE_PRECISION (out_type);
4100 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4101 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4102 stmt_info);
4103 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4104 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4105 stmt_info);
4106 else
4108 irhs1 = adjust_bool_pattern_cast (vinfo,
4109 out_type, irhs1, stmt_info);
4110 irhs2 = adjust_bool_pattern_cast (vinfo,
4111 out_type, irhs2, stmt_info);
4114 itype = TREE_TYPE (irhs1);
4115 pattern_stmt
4116 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4117 rhs_code, irhs1, irhs2);
4118 break;
4120 default:
4121 do_compare:
4122 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4123 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4124 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4125 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4126 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4128 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4129 itype
4130 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4132 else
4133 itype = TREE_TYPE (rhs1);
4134 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4135 if (trueval == NULL_TREE)
4136 trueval = build_int_cst (itype, 1);
4137 else
4138 gcc_checking_assert (useless_type_conversion_p (itype,
4139 TREE_TYPE (trueval)));
4140 pattern_stmt
4141 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4142 COND_EXPR, cond_expr, trueval,
4143 build_int_cst (itype, 0));
4144 break;
4147 gimple_set_location (pattern_stmt, loc);
4148 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4149 get_vectype_for_scalar_type (vinfo, itype));
4150 defs.put (var, gimple_assign_lhs (pattern_stmt));
4153 /* Comparison function to qsort a vector of gimple stmts after UID. */
4155 static int
4156 sort_after_uid (const void *p1, const void *p2)
4158 const gimple *stmt1 = *(const gimple * const *)p1;
4159 const gimple *stmt2 = *(const gimple * const *)p2;
4160 return gimple_uid (stmt1) - gimple_uid (stmt2);
4163 /* Create pattern stmts for all stmts participating in the bool pattern
4164 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4165 OUT_TYPE. Return the def of the pattern root. */
4167 static tree
4168 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4169 tree out_type, stmt_vec_info stmt_info)
4171 /* Gather original stmts in the bool pattern in their order of appearance
4172 in the IL. */
4173 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4174 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4175 i != bool_stmt_set.end (); ++i)
4176 bool_stmts.quick_push (*i);
4177 bool_stmts.qsort (sort_after_uid);
4179 /* Now process them in that order, producing pattern stmts. */
4180 hash_map <tree, tree> defs;
4181 for (unsigned i = 0; i < bool_stmts.length (); ++i)
4182 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4183 out_type, stmt_info, defs);
4185 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4186 gimple *pattern_stmt
4187 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4188 return gimple_assign_lhs (pattern_stmt);
4191 /* Return the proper type for converting bool VAR into
4192 an integer value or NULL_TREE if no such type exists.
4193 The type is chosen so that the converted value has the
4194 same number of elements as VAR's vector type. */
4196 static tree
4197 integer_type_for_mask (tree var, vec_info *vinfo)
4199 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4200 return NULL_TREE;
4202 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4203 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4204 return NULL_TREE;
4206 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4209 /* Function vect_recog_bool_pattern
4211 Try to find pattern like following:
4213 bool a_b, b_b, c_b, d_b, e_b;
4214 TYPE f_T;
4215 loop:
4216 S1 a_b = x1 CMP1 y1;
4217 S2 b_b = x2 CMP2 y2;
4218 S3 c_b = a_b & b_b;
4219 S4 d_b = x3 CMP3 y3;
4220 S5 e_b = c_b | d_b;
4221 S6 f_T = (TYPE) e_b;
4223 where type 'TYPE' is an integral type. Or a similar pattern
4224 ending in
4226 S6 f_Y = e_b ? r_Y : s_Y;
4228 as results from if-conversion of a complex condition.
4230 Input:
4232 * STMT_VINFO: The stmt at the end from which the pattern
4233 search begins, i.e. cast of a bool to
4234 an integer type.
4236 Output:
4238 * TYPE_OUT: The type of the output of this pattern.
4240 * Return value: A new stmt that will be used to replace the pattern.
4242 Assuming size of TYPE is the same as size of all comparisons
4243 (otherwise some casts would be added where needed), the above
4244 sequence we create related pattern stmts:
4245 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4246 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4247 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4248 S5' e_T = c_T | d_T;
4249 S6' f_T = e_T;
4251 Instead of the above S3' we could emit:
4252 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4253 S3' c_T = a_T | b_T;
4254 but the above is more efficient. */
4256 static gimple *
4257 vect_recog_bool_pattern (vec_info *vinfo,
4258 stmt_vec_info stmt_vinfo, tree *type_out)
4260 gimple *last_stmt = stmt_vinfo->stmt;
4261 enum tree_code rhs_code;
4262 tree var, lhs, rhs, vectype;
4263 gimple *pattern_stmt;
4265 if (!is_gimple_assign (last_stmt))
4266 return NULL;
4268 var = gimple_assign_rhs1 (last_stmt);
4269 lhs = gimple_assign_lhs (last_stmt);
4270 rhs_code = gimple_assign_rhs_code (last_stmt);
4272 if (rhs_code == VIEW_CONVERT_EXPR)
4273 var = TREE_OPERAND (var, 0);
4275 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4276 return NULL;
4278 hash_set<gimple *> bool_stmts;
4280 if (CONVERT_EXPR_CODE_P (rhs_code)
4281 || rhs_code == VIEW_CONVERT_EXPR)
4283 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4284 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4285 return NULL;
4286 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4288 if (check_bool_pattern (var, vinfo, bool_stmts))
4290 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4291 TREE_TYPE (lhs), stmt_vinfo);
4292 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4293 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4294 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4295 else
4296 pattern_stmt
4297 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4299 else
4301 tree type = integer_type_for_mask (var, vinfo);
4302 tree cst0, cst1, tmp;
4304 if (!type)
4305 return NULL;
4307 /* We may directly use cond with narrowed type to avoid
4308 multiple cond exprs with following result packing and
4309 perform single cond with packed mask instead. In case
4310 of widening we better make cond first and then extract
4311 results. */
4312 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4313 type = TREE_TYPE (lhs);
4315 cst0 = build_int_cst (type, 0);
4316 cst1 = build_int_cst (type, 1);
4317 tmp = vect_recog_temp_ssa_var (type, NULL);
4318 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4320 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4322 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4323 append_pattern_def_seq (vinfo, stmt_vinfo,
4324 pattern_stmt, new_vectype);
4326 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4327 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4331 *type_out = vectype;
4332 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4334 return pattern_stmt;
4336 else if (rhs_code == COND_EXPR
4337 && TREE_CODE (var) == SSA_NAME)
4339 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4340 if (vectype == NULL_TREE)
4341 return NULL;
4343 /* Build a scalar type for the boolean result that when
4344 vectorized matches the vector type of the result in
4345 size and number of elements. */
4346 unsigned prec
4347 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4348 TYPE_VECTOR_SUBPARTS (vectype));
4350 tree type
4351 = build_nonstandard_integer_type (prec,
4352 TYPE_UNSIGNED (TREE_TYPE (var)));
4353 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4354 return NULL;
4356 if (!check_bool_pattern (var, vinfo, bool_stmts))
4357 return NULL;
4359 rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4361 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4362 pattern_stmt
4363 = gimple_build_assign (lhs, COND_EXPR,
4364 build2 (NE_EXPR, boolean_type_node,
4365 rhs, build_int_cst (type, 0)),
4366 gimple_assign_rhs2 (last_stmt),
4367 gimple_assign_rhs3 (last_stmt));
4368 *type_out = vectype;
4369 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4371 return pattern_stmt;
4373 else if (rhs_code == SSA_NAME
4374 && STMT_VINFO_DATA_REF (stmt_vinfo))
4376 stmt_vec_info pattern_stmt_info;
4377 tree nunits_vectype;
4378 if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4379 &nunits_vectype)
4380 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4381 return NULL;
4383 if (check_bool_pattern (var, vinfo, bool_stmts))
4384 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4385 TREE_TYPE (vectype), stmt_vinfo);
4386 else
4388 tree type = integer_type_for_mask (var, vinfo);
4389 tree cst0, cst1, new_vectype;
4391 if (!type)
4392 return NULL;
4394 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4395 type = TREE_TYPE (vectype);
4397 cst0 = build_int_cst (type, 0);
4398 cst1 = build_int_cst (type, 1);
4399 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4401 rhs = vect_recog_temp_ssa_var (type, NULL);
4402 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4403 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4406 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4407 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4409 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4410 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4411 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4412 rhs = rhs2;
4414 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4415 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4416 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4417 *type_out = vectype;
4418 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4420 return pattern_stmt;
4422 else
4423 return NULL;
4427 /* A helper for vect_recog_mask_conversion_pattern. Build
4428 conversion of MASK to a type suitable for masking VECTYPE.
4429 Built statement gets required vectype and is appended to
4430 a pattern sequence of STMT_VINFO.
4432 Return converted mask. */
4434 static tree
4435 build_mask_conversion (vec_info *vinfo,
4436 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4438 gimple *stmt;
4439 tree masktype, tmp;
4441 masktype = truth_type_for (vectype);
4442 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4443 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4444 append_pattern_def_seq (vinfo, stmt_vinfo,
4445 stmt, masktype, TREE_TYPE (vectype));
4447 return tmp;
4451 /* Function vect_recog_mask_conversion_pattern
4453 Try to find statements which require boolean type
4454 converison. Additional conversion statements are
4455 added to handle such cases. For example:
4457 bool m_1, m_2, m_3;
4458 int i_4, i_5;
4459 double d_6, d_7;
4460 char c_1, c_2, c_3;
4462 S1 m_1 = i_4 > i_5;
4463 S2 m_2 = d_6 < d_7;
4464 S3 m_3 = m_1 & m_2;
4465 S4 c_1 = m_3 ? c_2 : c_3;
4467 Will be transformed into:
4469 S1 m_1 = i_4 > i_5;
4470 S2 m_2 = d_6 < d_7;
4471 S3'' m_2' = (_Bool[bitsize=32])m_2
4472 S3' m_3' = m_1 & m_2';
4473 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4474 S4' c_1' = m_3'' ? c_2 : c_3; */
4476 static gimple *
4477 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4478 stmt_vec_info stmt_vinfo, tree *type_out)
4480 gimple *last_stmt = stmt_vinfo->stmt;
4481 enum tree_code rhs_code;
4482 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4483 tree vectype1, vectype2;
4484 stmt_vec_info pattern_stmt_info;
4485 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4486 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4488 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4489 if (is_gimple_call (last_stmt)
4490 && gimple_call_internal_p (last_stmt))
4492 gcall *pattern_stmt;
4494 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4495 int mask_argno = internal_fn_mask_index (ifn);
4496 if (mask_argno < 0)
4497 return NULL;
4499 bool store_p = internal_store_fn_p (ifn);
4500 if (store_p)
4502 int rhs_index = internal_fn_stored_value_index (ifn);
4503 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4504 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4506 else
4508 lhs = gimple_call_lhs (last_stmt);
4509 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4512 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4513 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4514 if (!mask_arg_type)
4515 return NULL;
4516 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4518 if (!vectype1 || !vectype2
4519 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4520 TYPE_VECTOR_SUBPARTS (vectype2)))
4521 return NULL;
4523 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4525 auto_vec<tree, 8> args;
4526 unsigned int nargs = gimple_call_num_args (last_stmt);
4527 args.safe_grow (nargs, true);
4528 for (unsigned int i = 0; i < nargs; ++i)
4529 args[i] = ((int) i == mask_argno
4530 ? tmp
4531 : gimple_call_arg (last_stmt, i));
4532 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4534 if (!store_p)
4536 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4537 gimple_call_set_lhs (pattern_stmt, lhs);
4539 gimple_call_set_nothrow (pattern_stmt, true);
4541 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4542 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4543 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4545 *type_out = vectype1;
4546 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4548 return pattern_stmt;
4551 if (!is_gimple_assign (last_stmt))
4552 return NULL;
4554 gimple *pattern_stmt;
4555 lhs = gimple_assign_lhs (last_stmt);
4556 rhs1 = gimple_assign_rhs1 (last_stmt);
4557 rhs_code = gimple_assign_rhs_code (last_stmt);
4559 /* Check for cond expression requiring mask conversion. */
4560 if (rhs_code == COND_EXPR)
4562 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4564 if (TREE_CODE (rhs1) == SSA_NAME)
4566 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4567 if (!rhs1_type)
4568 return NULL;
4570 else if (COMPARISON_CLASS_P (rhs1))
4572 /* Check whether we're comparing scalar booleans and (if so)
4573 whether a better mask type exists than the mask associated
4574 with boolean-sized elements. This avoids unnecessary packs
4575 and unpacks if the booleans are set from comparisons of
4576 wider types. E.g. in:
4578 int x1, x2, x3, x4, y1, y1;
4580 bool b1 = (x1 == x2);
4581 bool b2 = (x3 == x4);
4582 ... = b1 == b2 ? y1 : y2;
4584 it is better for b1 and b2 to use the mask type associated
4585 with int elements rather bool (byte) elements. */
4586 rhs1_op0 = TREE_OPERAND (rhs1, 0);
4587 rhs1_op1 = TREE_OPERAND (rhs1, 1);
4588 if (!rhs1_op0 || !rhs1_op1)
4589 return NULL;
4590 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4591 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4593 if (!rhs1_op0_type)
4594 rhs1_type = TREE_TYPE (rhs1_op0);
4595 else if (!rhs1_op1_type)
4596 rhs1_type = TREE_TYPE (rhs1_op1);
4597 else if (TYPE_PRECISION (rhs1_op0_type)
4598 != TYPE_PRECISION (rhs1_op1_type))
4600 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4601 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4602 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4603 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4604 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4606 if (abs (tmp0) > abs (tmp1))
4607 rhs1_type = rhs1_op1_type;
4608 else
4609 rhs1_type = rhs1_op0_type;
4611 else
4612 rhs1_type = build_nonstandard_integer_type
4613 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4615 else
4616 rhs1_type = rhs1_op0_type;
4618 else
4619 return NULL;
4621 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4623 if (!vectype1 || !vectype2)
4624 return NULL;
4626 /* Continue if a conversion is needed. Also continue if we have
4627 a comparison whose vector type would normally be different from
4628 VECTYPE2 when considered in isolation. In that case we'll
4629 replace the comparison with an SSA name (so that we can record
4630 its vector type) and behave as though the comparison was an SSA
4631 name from the outset. */
4632 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4633 TYPE_VECTOR_SUBPARTS (vectype2))
4634 && !rhs1_op0_type
4635 && !rhs1_op1_type)
4636 return NULL;
4638 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4639 in place, we can handle it in vectorizable_condition. This avoids
4640 unnecessary promotion stmts and increased vectorization factor. */
4641 if (COMPARISON_CLASS_P (rhs1)
4642 && INTEGRAL_TYPE_P (rhs1_type)
4643 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4644 TYPE_VECTOR_SUBPARTS (vectype2)))
4646 enum vect_def_type dt;
4647 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4648 && dt == vect_external_def
4649 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4650 && (dt == vect_external_def
4651 || dt == vect_constant_def))
4653 tree wide_scalar_type = build_nonstandard_integer_type
4654 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4655 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4656 wide_scalar_type);
4657 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4658 return NULL;
4662 /* If rhs1 is a comparison we need to move it into a
4663 separate statement. */
4664 if (TREE_CODE (rhs1) != SSA_NAME)
4666 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4667 if (rhs1_op0_type
4668 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4669 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4670 vectype2, stmt_vinfo);
4671 if (rhs1_op1_type
4672 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4673 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4674 vectype2, stmt_vinfo);
4675 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4676 rhs1_op0, rhs1_op1);
4677 rhs1 = tmp;
4678 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4679 rhs1_type);
4682 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4683 TYPE_VECTOR_SUBPARTS (vectype2)))
4684 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4685 else
4686 tmp = rhs1;
4688 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4689 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4690 gimple_assign_rhs2 (last_stmt),
4691 gimple_assign_rhs3 (last_stmt));
4693 *type_out = vectype1;
4694 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4696 return pattern_stmt;
4699 /* Now check for binary boolean operations requiring conversion for
4700 one of operands. */
4701 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4702 return NULL;
4704 if (rhs_code != BIT_IOR_EXPR
4705 && rhs_code != BIT_XOR_EXPR
4706 && rhs_code != BIT_AND_EXPR
4707 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4708 return NULL;
4710 rhs2 = gimple_assign_rhs2 (last_stmt);
4712 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4713 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4715 if (!rhs1_type || !rhs2_type
4716 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4717 return NULL;
4719 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4721 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4722 if (!vectype1)
4723 return NULL;
4724 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4726 else
4728 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4729 if (!vectype1)
4730 return NULL;
4731 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4734 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4735 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4737 *type_out = vectype1;
4738 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4740 return pattern_stmt;
4743 /* STMT_INFO is a load or store. If the load or store is conditional, return
4744 the boolean condition under which it occurs, otherwise return null. */
4746 static tree
4747 vect_get_load_store_mask (stmt_vec_info stmt_info)
4749 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4751 gcc_assert (gimple_assign_single_p (def_assign));
4752 return NULL_TREE;
4755 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4757 internal_fn ifn = gimple_call_internal_fn (def_call);
4758 int mask_index = internal_fn_mask_index (ifn);
4759 return gimple_call_arg (def_call, mask_index);
4762 gcc_unreachable ();
4765 /* Return MASK if MASK is suitable for masking an operation on vectors
4766 of type VECTYPE, otherwise convert it into such a form and return
4767 the result. Associate any conversion statements with STMT_INFO's
4768 pattern. */
4770 static tree
4771 vect_convert_mask_for_vectype (tree mask, tree vectype,
4772 stmt_vec_info stmt_info, vec_info *vinfo)
4774 tree mask_type = integer_type_for_mask (mask, vinfo);
4775 if (mask_type)
4777 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4778 if (mask_vectype
4779 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4780 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4781 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4783 return mask;
4786 /* Return the equivalent of:
4788 fold_convert (TYPE, VALUE)
4790 with the expectation that the operation will be vectorized.
4791 If new statements are needed, add them as pattern statements
4792 to STMT_INFO. */
4794 static tree
4795 vect_add_conversion_to_pattern (vec_info *vinfo,
4796 tree type, tree value, stmt_vec_info stmt_info)
4798 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4799 return value;
4801 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4802 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4803 append_pattern_def_seq (vinfo, stmt_info, conversion,
4804 get_vectype_for_scalar_type (vinfo, type));
4805 return new_value;
4808 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4809 internal function. Return the final statement on success and set
4810 *TYPE_OUT to the vector type being loaded or stored.
4812 This function only handles gathers and scatters that were recognized
4813 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4815 static gimple *
4816 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4817 stmt_vec_info stmt_info, tree *type_out)
4819 /* Currently we only support this for loop vectorization. */
4820 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4821 if (!loop_vinfo)
4822 return NULL;
4824 /* Make sure that we're looking at a gather load or scatter store. */
4825 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4826 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4827 return NULL;
4829 /* Get the boolean that controls whether the load or store happens.
4830 This is null if the operation is unconditional. */
4831 tree mask = vect_get_load_store_mask (stmt_info);
4833 /* Make sure that the target supports an appropriate internal
4834 function for the gather/scatter operation. */
4835 gather_scatter_info gs_info;
4836 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4837 || gs_info.ifn == IFN_LAST)
4838 return NULL;
4840 /* Convert the mask to the right form. */
4841 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4842 gs_info.element_type);
4843 if (mask)
4844 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4845 loop_vinfo);
4846 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
4847 || gs_info.ifn == IFN_MASK_GATHER_LOAD)
4848 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
4850 /* Get the invariant base and non-invariant offset, converting the
4851 latter to the same width as the vector elements. */
4852 tree base = gs_info.base;
4853 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4854 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4855 gs_info.offset, stmt_info);
4857 /* Build the new pattern statement. */
4858 tree scale = size_int (gs_info.scale);
4859 gcall *pattern_stmt;
4860 if (DR_IS_READ (dr))
4862 tree zero = build_zero_cst (gs_info.element_type);
4863 if (mask != NULL)
4864 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4865 offset, scale, zero, mask);
4866 else
4867 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4868 offset, scale, zero);
4869 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4870 gimple_call_set_lhs (pattern_stmt, load_lhs);
4872 else
4874 tree rhs = vect_get_store_rhs (stmt_info);
4875 if (mask != NULL)
4876 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
4877 base, offset, scale, rhs,
4878 mask);
4879 else
4880 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
4881 base, offset, scale, rhs);
4883 gimple_call_set_nothrow (pattern_stmt, true);
4885 /* Copy across relevant vectorization info and associate DR with the
4886 new pattern statement instead of the original statement. */
4887 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4888 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4890 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4891 *type_out = vectype;
4892 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4894 return pattern_stmt;
4897 /* Return true if TYPE is a non-boolean integer type. These are the types
4898 that we want to consider for narrowing. */
4900 static bool
4901 vect_narrowable_type_p (tree type)
4903 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4906 /* Return true if the operation given by CODE can be truncated to N bits
4907 when only N bits of the output are needed. This is only true if bit N+1
4908 of the inputs has no effect on the low N bits of the result. */
4910 static bool
4911 vect_truncatable_operation_p (tree_code code)
4913 switch (code)
4915 case PLUS_EXPR:
4916 case MINUS_EXPR:
4917 case MULT_EXPR:
4918 case BIT_AND_EXPR:
4919 case BIT_IOR_EXPR:
4920 case BIT_XOR_EXPR:
4921 case COND_EXPR:
4922 return true;
4924 default:
4925 return false;
4929 /* Record that STMT_INFO could be changed from operating on TYPE to
4930 operating on a type with the precision and sign given by PRECISION
4931 and SIGN respectively. PRECISION is an arbitrary bit precision;
4932 it might not be a whole number of bytes. */
4934 static void
4935 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4936 unsigned int precision, signop sign)
4938 /* Round the precision up to a whole number of bytes. */
4939 precision = vect_element_precision (precision);
4940 if (precision < TYPE_PRECISION (type)
4941 && (!stmt_info->operation_precision
4942 || stmt_info->operation_precision > precision))
4944 stmt_info->operation_precision = precision;
4945 stmt_info->operation_sign = sign;
4949 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4950 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4951 is an arbitrary bit precision; it might not be a whole number of bytes. */
4953 static void
4954 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4955 unsigned int min_input_precision)
4957 /* This operation in isolation only requires the inputs to have
4958 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4959 that MIN_INPUT_PRECISION is a natural precision for the chain
4960 as a whole. E.g. consider something like:
4962 unsigned short *x, *y;
4963 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4965 The right shift can be done on unsigned chars, and only requires the
4966 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4967 approach would mean turning a natural chain of single-vector unsigned
4968 short operations into one that truncates "*x" and then extends
4969 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4970 operation and one vector for each unsigned char operation.
4971 This would be a significant pessimization.
4973 Instead only propagate the maximum of this precision and the precision
4974 required by the users of the result. This means that we don't pessimize
4975 the case above but continue to optimize things like:
4977 unsigned char *y;
4978 unsigned short *x;
4979 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4981 Here we would truncate two vectors of *x to a single vector of
4982 unsigned chars and use single-vector unsigned char operations for
4983 everything else, rather than doing two unsigned short copies of
4984 "(*x & 0xf0) >> 4" and then truncating the result. */
4985 min_input_precision = MAX (min_input_precision,
4986 stmt_info->min_output_precision);
4988 if (min_input_precision < TYPE_PRECISION (type)
4989 && (!stmt_info->min_input_precision
4990 || stmt_info->min_input_precision > min_input_precision))
4991 stmt_info->min_input_precision = min_input_precision;
4994 /* Subroutine of vect_determine_min_output_precision. Return true if
4995 we can calculate a reduced number of output bits for STMT_INFO,
4996 whose result is LHS. */
4998 static bool
4999 vect_determine_min_output_precision_1 (vec_info *vinfo,
5000 stmt_vec_info stmt_info, tree lhs)
5002 /* Take the maximum precision required by users of the result. */
5003 unsigned int precision = 0;
5004 imm_use_iterator iter;
5005 use_operand_p use;
5006 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5008 gimple *use_stmt = USE_STMT (use);
5009 if (is_gimple_debug (use_stmt))
5010 continue;
5011 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5012 if (!use_stmt_info || !use_stmt_info->min_input_precision)
5013 return false;
5014 /* The input precision recorded for COND_EXPRs applies only to the
5015 "then" and "else" values. */
5016 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5017 if (assign
5018 && gimple_assign_rhs_code (assign) == COND_EXPR
5019 && use->use != gimple_assign_rhs2_ptr (assign)
5020 && use->use != gimple_assign_rhs3_ptr (assign))
5021 return false;
5022 precision = MAX (precision, use_stmt_info->min_input_precision);
5025 if (dump_enabled_p ())
5026 dump_printf_loc (MSG_NOTE, vect_location,
5027 "only the low %d bits of %T are significant\n",
5028 precision, lhs);
5029 stmt_info->min_output_precision = precision;
5030 return true;
5033 /* Calculate min_output_precision for STMT_INFO. */
5035 static void
5036 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5038 /* We're only interested in statements with a narrowable result. */
5039 tree lhs = gimple_get_lhs (stmt_info->stmt);
5040 if (!lhs
5041 || TREE_CODE (lhs) != SSA_NAME
5042 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5043 return;
5045 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5046 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5049 /* Use range information to decide whether STMT (described by STMT_INFO)
5050 could be done in a narrower type. This is effectively a forward
5051 propagation, since it uses context-independent information that applies
5052 to all users of an SSA name. */
5054 static void
5055 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5057 tree lhs = gimple_assign_lhs (stmt);
5058 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5059 return;
5061 tree type = TREE_TYPE (lhs);
5062 if (!vect_narrowable_type_p (type))
5063 return;
5065 /* First see whether we have any useful range information for the result. */
5066 unsigned int precision = TYPE_PRECISION (type);
5067 signop sign = TYPE_SIGN (type);
5068 wide_int min_value, max_value;
5069 if (!vect_get_range_info (lhs, &min_value, &max_value))
5070 return;
5072 tree_code code = gimple_assign_rhs_code (stmt);
5073 unsigned int nops = gimple_num_ops (stmt);
5075 if (!vect_truncatable_operation_p (code))
5076 /* Check that all relevant input operands are compatible, and update
5077 [MIN_VALUE, MAX_VALUE] to include their ranges. */
5078 for (unsigned int i = 1; i < nops; ++i)
5080 tree op = gimple_op (stmt, i);
5081 if (TREE_CODE (op) == INTEGER_CST)
5083 /* Don't require the integer to have RHS_TYPE (which it might
5084 not for things like shift amounts, etc.), but do require it
5085 to fit the type. */
5086 if (!int_fits_type_p (op, type))
5087 return;
5089 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
5090 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
5092 else if (TREE_CODE (op) == SSA_NAME)
5094 /* Ignore codes that don't take uniform arguments. */
5095 if (!types_compatible_p (TREE_TYPE (op), type))
5096 return;
5098 wide_int op_min_value, op_max_value;
5099 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5100 return;
5102 min_value = wi::min (min_value, op_min_value, sign);
5103 max_value = wi::max (max_value, op_max_value, sign);
5105 else
5106 return;
5109 /* Try to switch signed types for unsigned types if we can.
5110 This is better for two reasons. First, unsigned ops tend
5111 to be cheaper than signed ops. Second, it means that we can
5112 handle things like:
5114 signed char c;
5115 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5119 signed char c;
5120 unsigned short res_1 = (unsigned short) c & 0xff00;
5121 int res = (int) res_1;
5123 where the intermediate result res_1 has unsigned rather than
5124 signed type. */
5125 if (sign == SIGNED && !wi::neg_p (min_value))
5126 sign = UNSIGNED;
5128 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
5129 unsigned int precision1 = wi::min_precision (min_value, sign);
5130 unsigned int precision2 = wi::min_precision (max_value, sign);
5131 unsigned int value_precision = MAX (precision1, precision2);
5132 if (value_precision >= precision)
5133 return;
5135 if (dump_enabled_p ())
5136 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5137 " without loss of precision: %G",
5138 sign == SIGNED ? "signed" : "unsigned",
5139 value_precision, stmt);
5141 vect_set_operation_type (stmt_info, type, value_precision, sign);
5142 vect_set_min_input_precision (stmt_info, type, value_precision);
5145 /* Use information about the users of STMT's result to decide whether
5146 STMT (described by STMT_INFO) could be done in a narrower type.
5147 This is effectively a backward propagation. */
5149 static void
5150 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5152 tree_code code = gimple_assign_rhs_code (stmt);
5153 unsigned int opno = (code == COND_EXPR ? 2 : 1);
5154 tree type = TREE_TYPE (gimple_op (stmt, opno));
5155 if (!vect_narrowable_type_p (type))
5156 return;
5158 unsigned int precision = TYPE_PRECISION (type);
5159 unsigned int operation_precision, min_input_precision;
5160 switch (code)
5162 CASE_CONVERT:
5163 /* Only the bits that contribute to the output matter. Don't change
5164 the precision of the operation itself. */
5165 operation_precision = precision;
5166 min_input_precision = stmt_info->min_output_precision;
5167 break;
5169 case LSHIFT_EXPR:
5170 case RSHIFT_EXPR:
5172 tree shift = gimple_assign_rhs2 (stmt);
5173 if (TREE_CODE (shift) != INTEGER_CST
5174 || !wi::ltu_p (wi::to_widest (shift), precision))
5175 return;
5176 unsigned int const_shift = TREE_INT_CST_LOW (shift);
5177 if (code == LSHIFT_EXPR)
5179 /* Avoid creating an undefined shift.
5181 ??? We could instead use min_output_precision as-is and
5182 optimize out-of-range shifts to zero. However, only
5183 degenerate testcases shift away all their useful input data,
5184 and it isn't natural to drop input operations in the middle
5185 of vectorization. This sort of thing should really be
5186 handled before vectorization. */
5187 operation_precision = MAX (stmt_info->min_output_precision,
5188 const_shift + 1);
5189 /* We need CONST_SHIFT fewer bits of the input. */
5190 min_input_precision = (MAX (operation_precision, const_shift)
5191 - const_shift);
5193 else
5195 /* We need CONST_SHIFT extra bits to do the operation. */
5196 operation_precision = (stmt_info->min_output_precision
5197 + const_shift);
5198 min_input_precision = operation_precision;
5200 break;
5203 default:
5204 if (vect_truncatable_operation_p (code))
5206 /* Input bit N has no effect on output bits N-1 and lower. */
5207 operation_precision = stmt_info->min_output_precision;
5208 min_input_precision = operation_precision;
5209 break;
5211 return;
5214 if (operation_precision < precision)
5216 if (dump_enabled_p ())
5217 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5218 " without affecting users: %G",
5219 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5220 operation_precision, stmt);
5221 vect_set_operation_type (stmt_info, type, operation_precision,
5222 TYPE_SIGN (type));
5224 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5227 /* Return true if the statement described by STMT_INFO sets a boolean
5228 SSA_NAME and if we know how to vectorize this kind of statement using
5229 vector mask types. */
5231 static bool
5232 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5234 tree lhs = gimple_get_lhs (stmt_info->stmt);
5235 if (!lhs
5236 || TREE_CODE (lhs) != SSA_NAME
5237 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5238 return false;
5240 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5242 tree_code rhs_code = gimple_assign_rhs_code (assign);
5243 switch (rhs_code)
5245 CASE_CONVERT:
5246 case SSA_NAME:
5247 case BIT_NOT_EXPR:
5248 case BIT_IOR_EXPR:
5249 case BIT_XOR_EXPR:
5250 case BIT_AND_EXPR:
5251 return true;
5253 default:
5254 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5257 else if (is_a <gphi *> (stmt_info->stmt))
5258 return true;
5259 return false;
5262 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5263 a vector mask type instead of a normal vector type. Record the
5264 result in STMT_INFO->mask_precision. */
5266 static void
5267 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5269 if (!possible_vector_mask_operation_p (stmt_info))
5270 return;
5272 /* If at least one boolean input uses a vector mask type,
5273 pick the mask type with the narrowest elements.
5275 ??? This is the traditional behavior. It should always produce
5276 the smallest number of operations, but isn't necessarily the
5277 optimal choice. For example, if we have:
5279 a = b & c
5281 where:
5283 - the user of a wants it to have a mask type for 16-bit elements (M16)
5284 - b also uses M16
5285 - c uses a mask type for 8-bit elements (M8)
5287 then picking M8 gives:
5289 - 1 M16->M8 pack for b
5290 - 1 M8 AND for a
5291 - 2 M8->M16 unpacks for the user of a
5293 whereas picking M16 would have given:
5295 - 2 M8->M16 unpacks for c
5296 - 2 M16 ANDs for a
5298 The number of operations are equal, but M16 would have given
5299 a shorter dependency chain and allowed more ILP. */
5300 unsigned int precision = ~0U;
5301 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5303 unsigned int nops = gimple_num_ops (assign);
5304 for (unsigned int i = 1; i < nops; ++i)
5306 tree rhs = gimple_op (assign, i);
5307 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5308 continue;
5310 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5311 if (!def_stmt_info)
5312 /* Don't let external or constant operands influence the choice.
5313 We can convert them to whichever vector type we pick. */
5314 continue;
5316 if (def_stmt_info->mask_precision)
5318 if (precision > def_stmt_info->mask_precision)
5319 precision = def_stmt_info->mask_precision;
5323 /* If the statement compares two values that shouldn't use vector masks,
5324 try comparing the values as normal scalars instead. */
5325 tree_code rhs_code = gimple_assign_rhs_code (assign);
5326 if (precision == ~0U
5327 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5329 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5330 scalar_mode mode;
5331 tree vectype, mask_type;
5332 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5333 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5334 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5335 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5336 precision = GET_MODE_BITSIZE (mode);
5339 else
5341 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5342 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5344 tree rhs = gimple_phi_arg_def (phi, i);
5346 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5347 if (!def_stmt_info)
5348 /* Don't let external or constant operands influence the choice.
5349 We can convert them to whichever vector type we pick. */
5350 continue;
5352 if (def_stmt_info->mask_precision)
5354 if (precision > def_stmt_info->mask_precision)
5355 precision = def_stmt_info->mask_precision;
5360 if (dump_enabled_p ())
5362 if (precision == ~0U)
5363 dump_printf_loc (MSG_NOTE, vect_location,
5364 "using normal nonmask vectors for %G",
5365 stmt_info->stmt);
5366 else
5367 dump_printf_loc (MSG_NOTE, vect_location,
5368 "using boolean precision %d for %G",
5369 precision, stmt_info->stmt);
5372 stmt_info->mask_precision = precision;
5375 /* Handle vect_determine_precisions for STMT_INFO, given that we
5376 have already done so for the users of its result. */
5378 void
5379 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5381 vect_determine_min_output_precision (vinfo, stmt_info);
5382 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5384 vect_determine_precisions_from_range (stmt_info, stmt);
5385 vect_determine_precisions_from_users (stmt_info, stmt);
5389 /* Walk backwards through the vectorizable region to determine the
5390 values of these fields:
5392 - min_output_precision
5393 - min_input_precision
5394 - operation_precision
5395 - operation_sign. */
5397 void
5398 vect_determine_precisions (vec_info *vinfo)
5400 DUMP_VECT_SCOPE ("vect_determine_precisions");
5402 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5404 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5405 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5406 unsigned int nbbs = loop->num_nodes;
5408 for (unsigned int i = 0; i < nbbs; i++)
5410 basic_block bb = bbs[i];
5411 for (auto gsi = gsi_start_phis (bb);
5412 !gsi_end_p (gsi); gsi_next (&gsi))
5414 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5415 if (stmt_info)
5416 vect_determine_mask_precision (vinfo, stmt_info);
5418 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5419 if (!is_gimple_debug (gsi_stmt (si)))
5420 vect_determine_mask_precision
5421 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5423 for (unsigned int i = 0; i < nbbs; i++)
5425 basic_block bb = bbs[nbbs - i - 1];
5426 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5427 !gsi_end_p (si); gsi_prev (&si))
5428 if (!is_gimple_debug (gsi_stmt (si)))
5429 vect_determine_stmt_precisions
5430 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5431 for (auto gsi = gsi_start_phis (bb);
5432 !gsi_end_p (gsi); gsi_next (&gsi))
5434 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5435 if (stmt_info)
5436 vect_determine_stmt_precisions (vinfo, stmt_info);
5440 else
5442 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5443 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5445 basic_block bb = bb_vinfo->bbs[i];
5446 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5448 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5449 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5450 vect_determine_mask_precision (vinfo, stmt_info);
5452 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5454 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5455 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5456 vect_determine_mask_precision (vinfo, stmt_info);
5459 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5461 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5462 !gsi_end_p (gsi); gsi_prev (&gsi))
5464 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5465 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5466 vect_determine_stmt_precisions (vinfo, stmt_info);
5468 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5469 !gsi_end_p (gsi); gsi_next (&gsi))
5471 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5472 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5473 vect_determine_stmt_precisions (vinfo, stmt_info);
5479 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5481 struct vect_recog_func
5483 vect_recog_func_ptr fn;
5484 const char *name;
5487 /* Note that ordering matters - the first pattern matching on a stmt is
5488 taken which means usually the more complex one needs to preceed the
5489 less comples onex (widen_sum only after dot_prod or sad for example). */
5490 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5491 { vect_recog_over_widening_pattern, "over_widening" },
5492 /* Must come after over_widening, which narrows the shift as much as
5493 possible beforehand. */
5494 { vect_recog_average_pattern, "average" },
5495 { vect_recog_mulhs_pattern, "mult_high" },
5496 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5497 { vect_recog_widen_mult_pattern, "widen_mult" },
5498 { vect_recog_dot_prod_pattern, "dot_prod" },
5499 { vect_recog_sad_pattern, "sad" },
5500 { vect_recog_widen_sum_pattern, "widen_sum" },
5501 { vect_recog_pow_pattern, "pow" },
5502 { vect_recog_popcount_pattern, "popcount" },
5503 { vect_recog_widen_shift_pattern, "widen_shift" },
5504 { vect_recog_rotate_pattern, "rotate" },
5505 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5506 { vect_recog_divmod_pattern, "divmod" },
5507 { vect_recog_mult_pattern, "mult" },
5508 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5509 { vect_recog_bool_pattern, "bool" },
5510 /* This must come before mask conversion, and includes the parts
5511 of mask conversion that are needed for gather and scatter
5512 internal functions. */
5513 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5514 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5515 { vect_recog_widen_plus_pattern, "widen_plus" },
5516 { vect_recog_widen_minus_pattern, "widen_minus" },
5519 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5521 /* Mark statements that are involved in a pattern. */
5523 void
5524 vect_mark_pattern_stmts (vec_info *vinfo,
5525 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5526 tree pattern_vectype)
5528 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5529 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5531 gimple *orig_pattern_stmt = NULL;
5532 if (is_pattern_stmt_p (orig_stmt_info))
5534 /* We're replacing a statement in an existing pattern definition
5535 sequence. */
5536 orig_pattern_stmt = orig_stmt_info->stmt;
5537 if (dump_enabled_p ())
5538 dump_printf_loc (MSG_NOTE, vect_location,
5539 "replacing earlier pattern %G", orig_pattern_stmt);
5541 /* To keep the book-keeping simple, just swap the lhs of the
5542 old and new statements, so that the old one has a valid but
5543 unused lhs. */
5544 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5545 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5546 gimple_set_lhs (pattern_stmt, old_lhs);
5548 if (dump_enabled_p ())
5549 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5551 /* Switch to the statement that ORIG replaces. */
5552 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5554 /* We shouldn't be replacing the main pattern statement. */
5555 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5556 != orig_pattern_stmt);
5559 if (def_seq)
5560 for (gimple_stmt_iterator si = gsi_start (def_seq);
5561 !gsi_end_p (si); gsi_next (&si))
5563 if (dump_enabled_p ())
5564 dump_printf_loc (MSG_NOTE, vect_location,
5565 "extra pattern stmt: %G", gsi_stmt (si));
5566 stmt_vec_info pattern_stmt_info
5567 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5568 orig_stmt_info, pattern_vectype);
5569 /* Stmts in the def sequence are not vectorizable cycle or
5570 induction defs, instead they should all be vect_internal_def
5571 feeding the main pattern stmt which retains this def type. */
5572 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5575 if (orig_pattern_stmt)
5577 vect_init_pattern_stmt (vinfo, pattern_stmt,
5578 orig_stmt_info, pattern_vectype);
5580 /* Insert all the new pattern statements before the original one. */
5581 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5582 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5583 orig_def_seq);
5584 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5585 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5587 /* Remove the pattern statement that this new pattern replaces. */
5588 gsi_remove (&gsi, false);
5590 else
5591 vect_set_pattern_stmt (vinfo,
5592 pattern_stmt, orig_stmt_info, pattern_vectype);
5594 /* Transfer reduction path info to the pattern. */
5595 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5597 gimple_match_op op;
5598 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
5599 gcc_unreachable ();
5600 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
5601 /* Search the pattern def sequence and the main pattern stmt. Note
5602 we may have inserted all into a containing pattern def sequence
5603 so the following is a bit awkward. */
5604 gimple_stmt_iterator si;
5605 gimple *s;
5606 if (def_seq)
5608 si = gsi_start (def_seq);
5609 s = gsi_stmt (si);
5610 gsi_next (&si);
5612 else
5614 si = gsi_none ();
5615 s = pattern_stmt;
5619 bool found = false;
5620 if (gimple_extract_op (s, &op))
5621 for (unsigned i = 0; i < op.num_ops; ++i)
5622 if (op.ops[i] == lookfor)
5624 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
5625 lookfor = gimple_get_lhs (s);
5626 found = true;
5627 break;
5629 if (s == pattern_stmt)
5631 if (!found && dump_enabled_p ())
5632 dump_printf_loc (MSG_NOTE, vect_location,
5633 "failed to update reduction index.\n");
5634 break;
5636 if (gsi_end_p (si))
5637 s = pattern_stmt;
5638 else
5640 s = gsi_stmt (si);
5641 if (s == pattern_stmt)
5642 /* Found the end inside a bigger pattern def seq. */
5643 si = gsi_none ();
5644 else
5645 gsi_next (&si);
5647 } while (1);
5651 /* Function vect_pattern_recog_1
5653 Input:
5654 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5655 computation pattern.
5656 STMT_INFO: A stmt from which the pattern search should start.
5658 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5659 a sequence of statements that has the same functionality and can be
5660 used to replace STMT_INFO. It returns the last statement in the sequence
5661 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5662 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5663 statement, having first checked that the target supports the new operation
5664 in that type.
5666 This function also does some bookkeeping, as explained in the documentation
5667 for vect_recog_pattern. */
5669 static void
5670 vect_pattern_recog_1 (vec_info *vinfo,
5671 vect_recog_func *recog_func, stmt_vec_info stmt_info)
5673 gimple *pattern_stmt;
5674 loop_vec_info loop_vinfo;
5675 tree pattern_vectype;
5677 /* If this statement has already been replaced with pattern statements,
5678 leave the original statement alone, since the first match wins.
5679 Instead try to match against the definition statements that feed
5680 the main pattern statement. */
5681 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5683 gimple_stmt_iterator gsi;
5684 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5685 !gsi_end_p (gsi); gsi_next (&gsi))
5686 vect_pattern_recog_1 (vinfo, recog_func,
5687 vinfo->lookup_stmt (gsi_stmt (gsi)));
5688 return;
5691 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5692 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5693 if (!pattern_stmt)
5695 /* Clear any half-formed pattern definition sequence. */
5696 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5697 return;
5700 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5702 /* Found a vectorizable pattern. */
5703 if (dump_enabled_p ())
5704 dump_printf_loc (MSG_NOTE, vect_location,
5705 "%s pattern recognized: %G",
5706 recog_func->name, pattern_stmt);
5708 /* Mark the stmts that are involved in the pattern. */
5709 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5711 /* Patterns cannot be vectorized using SLP, because they change the order of
5712 computation. */
5713 if (loop_vinfo)
5715 unsigned ix, ix2;
5716 stmt_vec_info *elem_ptr;
5717 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5718 elem_ptr, *elem_ptr == stmt_info);
5723 /* Function vect_pattern_recog
5725 Input:
5726 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5727 computation idioms.
5729 Output - for each computation idiom that is detected we create a new stmt
5730 that provides the same functionality and that can be vectorized. We
5731 also record some information in the struct_stmt_info of the relevant
5732 stmts, as explained below:
5734 At the entry to this function we have the following stmts, with the
5735 following initial value in the STMT_VINFO fields:
5737 stmt in_pattern_p related_stmt vec_stmt
5738 S1: a_i = .... - - -
5739 S2: a_2 = ..use(a_i).. - - -
5740 S3: a_1 = ..use(a_2).. - - -
5741 S4: a_0 = ..use(a_1).. - - -
5742 S5: ... = ..use(a_0).. - - -
5744 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5745 represented by a single stmt. We then:
5746 - create a new stmt S6 equivalent to the pattern (the stmt is not
5747 inserted into the code)
5748 - fill in the STMT_VINFO fields as follows:
5750 in_pattern_p related_stmt vec_stmt
5751 S1: a_i = .... - - -
5752 S2: a_2 = ..use(a_i).. - - -
5753 S3: a_1 = ..use(a_2).. - - -
5754 S4: a_0 = ..use(a_1).. true S6 -
5755 '---> S6: a_new = .... - S4 -
5756 S5: ... = ..use(a_0).. - - -
5758 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5759 to each other through the RELATED_STMT field).
5761 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5762 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5763 remain irrelevant unless used by stmts other than S4.
5765 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5766 (because they are marked as irrelevant). It will vectorize S6, and record
5767 a pointer to the new vector stmt VS6 from S6 (as usual).
5768 S4 will be skipped, and S5 will be vectorized as usual:
5770 in_pattern_p related_stmt vec_stmt
5771 S1: a_i = .... - - -
5772 S2: a_2 = ..use(a_i).. - - -
5773 S3: a_1 = ..use(a_2).. - - -
5774 > VS6: va_new = .... - - -
5775 S4: a_0 = ..use(a_1).. true S6 VS6
5776 '---> S6: a_new = .... - S4 VS6
5777 > VS5: ... = ..vuse(va_new).. - - -
5778 S5: ... = ..use(a_0).. - - -
5780 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5781 elsewhere), and we'll end up with:
5783 VS6: va_new = ....
5784 VS5: ... = ..vuse(va_new)..
5786 In case of more than one pattern statements, e.g., widen-mult with
5787 intermediate type:
5789 S1 a_t = ;
5790 S2 a_T = (TYPE) a_t;
5791 '--> S3: a_it = (interm_type) a_t;
5792 S4 prod_T = a_T * CONST;
5793 '--> S5: prod_T' = a_it w* CONST;
5795 there may be other users of a_T outside the pattern. In that case S2 will
5796 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5797 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5798 be recorded in S3. */
5800 void
5801 vect_pattern_recog (vec_info *vinfo)
5803 class loop *loop;
5804 basic_block *bbs;
5805 unsigned int nbbs;
5806 gimple_stmt_iterator si;
5807 unsigned int i, j;
5809 vect_determine_precisions (vinfo);
5811 DUMP_VECT_SCOPE ("vect_pattern_recog");
5813 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5815 loop = LOOP_VINFO_LOOP (loop_vinfo);
5816 bbs = LOOP_VINFO_BBS (loop_vinfo);
5817 nbbs = loop->num_nodes;
5819 /* Scan through the loop stmts, applying the pattern recognition
5820 functions starting at each stmt visited: */
5821 for (i = 0; i < nbbs; i++)
5823 basic_block bb = bbs[i];
5824 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5826 if (is_gimple_debug (gsi_stmt (si)))
5827 continue;
5828 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5829 /* Scan over all generic vect_recog_xxx_pattern functions. */
5830 for (j = 0; j < NUM_PATTERNS; j++)
5831 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5832 stmt_info);
5836 else
5838 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5839 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5840 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5841 !gsi_end_p (gsi); gsi_next (&gsi))
5843 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5844 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5845 continue;
5847 /* Scan over all generic vect_recog_xxx_pattern functions. */
5848 for (j = 0; j < NUM_PATTERNS; j++)
5849 vect_pattern_recog_1 (vinfo,
5850 &vect_vect_recog_func_ptrs[j], stmt_info);
5854 /* After this no more add_stmt calls are allowed. */
5855 vinfo->stmt_vec_info_ro = true;