d: Add test for PR d/108167 to the testsuite [PR108167]
[official-gcc.git] / gcc / tree-vect-patterns.cc
blobdd585e59bf7b4182aae22c13449da943f385791b
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimple-fold.h"
39 #include "gimplify-me.h"
40 #include "cfgloop.h"
41 #include "tree-vectorizer.h"
42 #include "dumpfile.h"
43 #include "builtins.h"
44 #include "internal-fn.h"
45 #include "case-cfn-macros.h"
46 #include "fold-const-call.h"
47 #include "attribs.h"
48 #include "cgraph.h"
49 #include "omp-simd-clone.h"
50 #include "predict.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "gimple-range.h"
56 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
57 in the first operand. Disentangling this is future work, the
58 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
61 /* Return true if we have a useful VR_RANGE range for VAR, storing it
62 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
64 static bool
65 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
67 value_range vr;
68 get_range_query (cfun)->range_of_expr (vr, var);
69 if (vr.undefined_p ())
70 vr.set_varying (TREE_TYPE (var));
71 *min_value = wi::to_wide (vr.min ());
72 *max_value = wi::to_wide (vr.max ());
73 value_range_kind vr_type = vr.kind ();
74 wide_int nonzero = get_nonzero_bits (var);
75 signop sgn = TYPE_SIGN (TREE_TYPE (var));
76 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
77 nonzero, sgn) == VR_RANGE)
79 if (dump_enabled_p ())
81 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
82 dump_printf (MSG_NOTE, " has range [");
83 dump_hex (MSG_NOTE, *min_value);
84 dump_printf (MSG_NOTE, ", ");
85 dump_hex (MSG_NOTE, *max_value);
86 dump_printf (MSG_NOTE, "]\n");
88 return true;
90 else
92 if (dump_enabled_p ())
94 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
95 dump_printf (MSG_NOTE, " has no range info\n");
97 return false;
101 /* Report that we've found an instance of pattern PATTERN in
102 statement STMT. */
104 static void
105 vect_pattern_detected (const char *name, gimple *stmt)
107 if (dump_enabled_p ())
108 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
111 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
112 return the pattern statement's stmt_vec_info. Set its vector type to
113 VECTYPE if it doesn't have one already. */
115 static stmt_vec_info
116 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
117 stmt_vec_info orig_stmt_info, tree vectype)
119 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
120 if (pattern_stmt_info == NULL)
121 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
122 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
124 pattern_stmt_info->pattern_stmt_p = true;
125 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
126 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
127 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
128 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
130 gcc_assert (!vectype
131 || (VECTOR_BOOLEAN_TYPE_P (vectype)
132 == vect_use_mask_type_p (orig_stmt_info)));
133 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
134 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
136 return pattern_stmt_info;
139 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
140 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
141 have one already. */
143 static void
144 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
145 stmt_vec_info orig_stmt_info, tree vectype)
147 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
148 STMT_VINFO_RELATED_STMT (orig_stmt_info)
149 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
152 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
153 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
154 be different from the vector type of the final pattern statement.
155 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
156 from which it was derived. */
158 static inline void
159 append_pattern_def_seq (vec_info *vinfo,
160 stmt_vec_info stmt_info, gimple *new_stmt,
161 tree vectype = NULL_TREE,
162 tree scalar_type_for_mask = NULL_TREE)
164 gcc_assert (!scalar_type_for_mask
165 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
166 if (vectype)
168 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
169 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
170 if (scalar_type_for_mask)
171 new_stmt_info->mask_precision
172 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
174 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
175 new_stmt);
178 /* The caller wants to perform new operations on vect_external variable
179 VAR, so that the result of the operations would also be vect_external.
180 Return the edge on which the operations can be performed, if one exists.
181 Return null if the operations should instead be treated as part of
182 the pattern that needs them. */
184 static edge
185 vect_get_external_def_edge (vec_info *vinfo, tree var)
187 edge e = NULL;
188 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
190 e = loop_preheader_edge (loop_vinfo->loop);
191 if (!SSA_NAME_IS_DEFAULT_DEF (var))
193 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
194 if (bb == NULL
195 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
196 e = NULL;
199 return e;
202 /* Return true if the target supports a vector version of CODE,
203 where CODE is known to map to a direct optab with the given SUBTYPE.
204 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
205 specifies the type of the scalar result.
207 If CODE allows the inputs and outputs to have different type
208 (such as for WIDEN_SUM_EXPR), it is the input mode rather
209 than the output mode that determines the appropriate target pattern.
210 Operand 0 of the target pattern then specifies the mode that the output
211 must have.
213 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
214 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
215 is nonnull. */
217 static bool
218 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
219 tree itype, tree *vecotype_out,
220 tree *vecitype_out = NULL,
221 enum optab_subtype subtype = optab_default)
223 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
224 if (!vecitype)
225 return false;
227 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
228 if (!vecotype)
229 return false;
231 optab optab = optab_for_tree_code (code, vecitype, subtype);
232 if (!optab)
233 return false;
235 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
236 if (icode == CODE_FOR_nothing
237 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
238 return false;
240 *vecotype_out = vecotype;
241 if (vecitype_out)
242 *vecitype_out = vecitype;
243 return true;
246 /* Round bit precision PRECISION up to a full element. */
248 static unsigned int
249 vect_element_precision (unsigned int precision)
251 precision = 1 << ceil_log2 (precision);
252 return MAX (precision, BITS_PER_UNIT);
255 /* If OP is defined by a statement that's being considered for vectorization,
256 return information about that statement, otherwise return NULL. */
258 static stmt_vec_info
259 vect_get_internal_def (vec_info *vinfo, tree op)
261 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
262 if (def_stmt_info
263 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
264 return def_stmt_info;
265 return NULL;
268 /* Check whether NAME, an ssa-name used in STMT_VINFO,
269 is a result of a type promotion, such that:
270 DEF_STMT: NAME = NOP (name0)
271 If CHECK_SIGN is TRUE, check that either both types are signed or both are
272 unsigned. */
274 static bool
275 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
276 tree *orig_type, gimple **def_stmt, bool *promotion)
278 tree type = TREE_TYPE (name);
279 tree oprnd0;
280 enum vect_def_type dt;
282 stmt_vec_info def_stmt_info;
283 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
284 return false;
286 if (dt != vect_internal_def
287 && dt != vect_external_def && dt != vect_constant_def)
288 return false;
290 if (!*def_stmt)
291 return false;
293 if (!is_gimple_assign (*def_stmt))
294 return false;
296 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
297 return false;
299 oprnd0 = gimple_assign_rhs1 (*def_stmt);
301 *orig_type = TREE_TYPE (oprnd0);
302 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
303 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
304 return false;
306 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
307 *promotion = true;
308 else
309 *promotion = false;
311 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
312 return false;
314 return true;
317 /* Holds information about an input operand after some sign changes
318 and type promotions have been peeled away. */
319 class vect_unpromoted_value {
320 public:
321 vect_unpromoted_value ();
323 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
325 /* The value obtained after peeling away zero or more casts. */
326 tree op;
328 /* The type of OP. */
329 tree type;
331 /* The definition type of OP. */
332 vect_def_type dt;
334 /* If OP is the result of peeling at least one cast, and if the cast
335 of OP itself is a vectorizable statement, CASTER identifies that
336 statement, otherwise it is null. */
337 stmt_vec_info caster;
340 inline vect_unpromoted_value::vect_unpromoted_value ()
341 : op (NULL_TREE),
342 type (NULL_TREE),
343 dt (vect_uninitialized_def),
344 caster (NULL)
348 /* Set the operand to OP_IN, its definition type to DT_IN, and the
349 statement that casts it to CASTER_IN. */
351 inline void
352 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
353 stmt_vec_info caster_in)
355 op = op_in;
356 type = TREE_TYPE (op);
357 dt = dt_in;
358 caster = caster_in;
361 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
362 to reach some vectorizable inner operand OP', continuing as long as it
363 is possible to convert OP' back to OP using a possible sign change
364 followed by a possible promotion P. Return this OP', or null if OP is
365 not a vectorizable SSA name. If there is a promotion P, describe its
366 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
367 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
368 have more than one user.
370 A successful return means that it is possible to go from OP' to OP
371 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
372 whereas the cast from UNPROM to OP might be a promotion, a sign
373 change, or a nop.
375 E.g. say we have:
377 signed short *ptr = ...;
378 signed short C = *ptr;
379 unsigned short B = (unsigned short) C; // sign change
380 signed int A = (signed int) B; // unsigned promotion
381 ...possible other uses of A...
382 unsigned int OP = (unsigned int) A; // sign change
384 In this case it's possible to go directly from C to OP using:
386 OP = (unsigned int) (unsigned short) C;
387 +------------+ +--------------+
388 promotion sign change
390 so OP' would be C. The input to the promotion is B, so UNPROM
391 would describe B. */
393 static tree
394 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
395 vect_unpromoted_value *unprom,
396 bool *single_use_p = NULL)
398 tree res = NULL_TREE;
399 tree op_type = TREE_TYPE (op);
400 unsigned int orig_precision = TYPE_PRECISION (op_type);
401 unsigned int min_precision = orig_precision;
402 stmt_vec_info caster = NULL;
403 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
405 /* See whether OP is simple enough to vectorize. */
406 stmt_vec_info def_stmt_info;
407 gimple *def_stmt;
408 vect_def_type dt;
409 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
410 break;
412 /* If OP is the input of a demotion, skip over it to see whether
413 OP is itself the result of a promotion. If so, the combined
414 effect of the promotion and the demotion might fit the required
415 pattern, otherwise neither operation fits.
417 This copes with cases such as the result of an arithmetic
418 operation being truncated before being stored, and where that
419 arithmetic operation has been recognized as an over-widened one. */
420 if (TYPE_PRECISION (op_type) <= min_precision)
422 /* Use OP as the UNPROM described above if we haven't yet
423 found a promotion, or if using the new input preserves the
424 sign of the previous promotion. */
425 if (!res
426 || TYPE_PRECISION (unprom->type) == orig_precision
427 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
429 unprom->set_op (op, dt, caster);
430 min_precision = TYPE_PRECISION (op_type);
432 /* Stop if we've already seen a promotion and if this
433 conversion does more than change the sign. */
434 else if (TYPE_PRECISION (op_type)
435 != TYPE_PRECISION (unprom->type))
436 break;
438 /* The sequence now extends to OP. */
439 res = op;
442 /* See whether OP is defined by a cast. Record it as CASTER if
443 the cast is potentially vectorizable. */
444 if (!def_stmt)
445 break;
446 caster = def_stmt_info;
448 /* Ignore pattern statements, since we don't link uses for them. */
449 if (caster
450 && single_use_p
451 && !STMT_VINFO_RELATED_STMT (caster)
452 && !has_single_use (res))
453 *single_use_p = false;
455 gassign *assign = dyn_cast <gassign *> (def_stmt);
456 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
457 break;
459 /* Continue with the input to the cast. */
460 op = gimple_assign_rhs1 (def_stmt);
461 op_type = TREE_TYPE (op);
463 return res;
466 /* OP is an integer operand to an operation that returns TYPE, and we
467 want to treat the operation as a widening one. So far we can treat
468 it as widening from *COMMON_TYPE.
470 Return true if OP is suitable for such a widening operation,
471 either widening from *COMMON_TYPE or from some supertype of it.
472 Update *COMMON_TYPE to the supertype in the latter case.
474 SHIFT_P is true if OP is a shift amount. */
476 static bool
477 vect_joust_widened_integer (tree type, bool shift_p, tree op,
478 tree *common_type)
480 /* Calculate the minimum precision required by OP, without changing
481 the sign of either operand. */
482 unsigned int precision;
483 if (shift_p)
485 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
486 return false;
487 precision = TREE_INT_CST_LOW (op);
489 else
491 precision = wi::min_precision (wi::to_widest (op),
492 TYPE_SIGN (*common_type));
493 if (precision * 2 > TYPE_PRECISION (type))
494 return false;
497 /* If OP requires a wider type, switch to that type. The checks
498 above ensure that this is still narrower than the result. */
499 precision = vect_element_precision (precision);
500 if (TYPE_PRECISION (*common_type) < precision)
501 *common_type = build_nonstandard_integer_type
502 (precision, TYPE_UNSIGNED (*common_type));
503 return true;
506 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
507 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
509 static bool
510 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
512 if (types_compatible_p (*common_type, new_type))
513 return true;
515 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
516 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
517 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
518 return true;
520 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
521 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
522 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
524 *common_type = new_type;
525 return true;
528 /* We have mismatched signs, with the signed type being
529 no wider than the unsigned type. In this case we need
530 a wider signed type. */
531 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
532 TYPE_PRECISION (new_type));
533 precision *= 2;
535 if (precision * 2 > TYPE_PRECISION (type))
536 return false;
538 *common_type = build_nonstandard_integer_type (precision, false);
539 return true;
542 /* Check whether STMT_INFO can be viewed as a tree of integer operations
543 in which each node either performs CODE or WIDENED_CODE, and where
544 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
545 specifies the maximum number of leaf operands. SHIFT_P says whether
546 CODE and WIDENED_CODE are some sort of shift.
548 If STMT_INFO is such a tree, return the number of leaf operands
549 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
550 to a type that (a) is narrower than the result of STMT_INFO and
551 (b) can hold all leaf operand values.
553 If SUBTYPE then allow that the signs of the operands
554 may differ in signs but not in precision. SUBTYPE is updated to reflect
555 this.
557 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
558 exists. */
560 static unsigned int
561 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
562 tree_code widened_code, bool shift_p,
563 unsigned int max_nops,
564 vect_unpromoted_value *unprom, tree *common_type,
565 enum optab_subtype *subtype = NULL)
567 /* Check for an integer operation with the right code. */
568 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
569 if (!assign)
570 return 0;
572 tree_code rhs_code = gimple_assign_rhs_code (assign);
573 if (rhs_code != code && rhs_code != widened_code)
574 return 0;
576 tree type = TREE_TYPE (gimple_assign_lhs (assign));
577 if (!INTEGRAL_TYPE_P (type))
578 return 0;
580 /* Assume that both operands will be leaf operands. */
581 max_nops -= 2;
583 /* Check the operands. */
584 unsigned int next_op = 0;
585 for (unsigned int i = 0; i < 2; ++i)
587 vect_unpromoted_value *this_unprom = &unprom[next_op];
588 unsigned int nops = 1;
589 tree op = gimple_op (assign, i + 1);
590 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
592 /* We already have a common type from earlier operands.
593 Update it to account for OP. */
594 this_unprom->set_op (op, vect_constant_def);
595 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
596 return 0;
598 else
600 /* Only allow shifts by constants. */
601 if (shift_p && i == 1)
602 return 0;
604 if (rhs_code != code)
606 /* If rhs_code is widened_code, don't look through further
607 possible promotions, there is a promotion already embedded
608 in the WIDEN_*_EXPR. */
609 if (TREE_CODE (op) != SSA_NAME
610 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
611 return 0;
613 stmt_vec_info def_stmt_info;
614 gimple *def_stmt;
615 vect_def_type dt;
616 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
617 &def_stmt))
618 return 0;
619 this_unprom->set_op (op, dt, NULL);
621 else if (!vect_look_through_possible_promotion (vinfo, op,
622 this_unprom))
623 return 0;
625 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
627 /* The operand isn't widened. If STMT_INFO has the code
628 for an unwidened operation, recursively check whether
629 this operand is a node of the tree. */
630 if (rhs_code != code
631 || max_nops == 0
632 || this_unprom->dt != vect_internal_def)
633 return 0;
635 /* Give back the leaf slot allocated above now that we're
636 not treating this as a leaf operand. */
637 max_nops += 1;
639 /* Recursively process the definition of the operand. */
640 stmt_vec_info def_stmt_info
641 = vinfo->lookup_def (this_unprom->op);
642 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
643 widened_code, shift_p, max_nops,
644 this_unprom, common_type,
645 subtype);
646 if (nops == 0)
647 return 0;
649 max_nops -= nops;
651 else
653 /* Make sure that the operand is narrower than the result. */
654 if (TYPE_PRECISION (this_unprom->type) * 2
655 > TYPE_PRECISION (type))
656 return 0;
658 /* Update COMMON_TYPE for the new operand. */
659 if (i == 0)
660 *common_type = this_unprom->type;
661 else if (!vect_joust_widened_type (type, this_unprom->type,
662 common_type))
664 if (subtype)
666 /* See if we can sign extend the smaller type. */
667 if (TYPE_PRECISION (this_unprom->type)
668 > TYPE_PRECISION (*common_type))
669 *common_type = this_unprom->type;
670 *subtype = optab_vector_mixed_sign;
672 else
673 return 0;
677 next_op += nops;
679 return next_op;
682 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
683 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
685 static tree
686 vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
688 return make_temp_ssa_name (type, stmt, "patt");
691 /* STMT2_INFO describes a type conversion that could be split into STMT1
692 followed by a version of STMT2_INFO that takes NEW_RHS as its first
693 input. Try to do this using pattern statements, returning true on
694 success. */
696 static bool
697 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
698 gimple *stmt1, tree vectype)
700 if (is_pattern_stmt_p (stmt2_info))
702 /* STMT2_INFO is part of a pattern. Get the statement to which
703 the pattern is attached. */
704 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
705 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_NOTE, vect_location,
709 "Splitting pattern statement: %G", stmt2_info->stmt);
711 /* Since STMT2_INFO is a pattern statement, we can change it
712 in-situ without worrying about changing the code for the
713 containing block. */
714 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
716 if (dump_enabled_p ())
718 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
719 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
720 stmt2_info->stmt);
723 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
724 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
725 /* STMT2_INFO is the actual pattern statement. Add STMT1
726 to the end of the definition sequence. */
727 gimple_seq_add_stmt_without_update (def_seq, stmt1);
728 else
730 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
731 before it. */
732 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
733 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
735 return true;
737 else
739 /* STMT2_INFO doesn't yet have a pattern. Try to create a
740 two-statement pattern now. */
741 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
742 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
743 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
744 if (!lhs_vectype)
745 return false;
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_NOTE, vect_location,
749 "Splitting statement: %G", stmt2_info->stmt);
751 /* Add STMT1 as a singleton pattern definition sequence. */
752 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
753 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
754 gimple_seq_add_stmt_without_update (def_seq, stmt1);
756 /* Build the second of the two pattern statements. */
757 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
758 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
759 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
761 if (dump_enabled_p ())
763 dump_printf_loc (MSG_NOTE, vect_location,
764 "into pattern statements: %G", stmt1);
765 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
766 (gimple *) new_stmt2);
769 return true;
773 /* Convert UNPROM to TYPE and return the result, adding new statements
774 to STMT_INFO's pattern definition statements if no better way is
775 available. VECTYPE is the vector form of TYPE.
777 If SUBTYPE then convert the type based on the subtype. */
779 static tree
780 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
781 vect_unpromoted_value *unprom, tree vectype,
782 enum optab_subtype subtype = optab_default)
784 /* Update the type if the signs differ. */
785 if (subtype == optab_vector_mixed_sign)
787 gcc_assert (!TYPE_UNSIGNED (type));
788 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
790 type = unsigned_type_for (type);
791 vectype = unsigned_type_for (vectype);
795 /* Check for a no-op conversion. */
796 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
797 return unprom->op;
799 /* Allow the caller to create constant vect_unpromoted_values. */
800 if (TREE_CODE (unprom->op) == INTEGER_CST)
801 return wide_int_to_tree (type, wi::to_widest (unprom->op));
803 tree input = unprom->op;
804 if (unprom->caster)
806 tree lhs = gimple_get_lhs (unprom->caster->stmt);
807 tree lhs_type = TREE_TYPE (lhs);
809 /* If the result of the existing cast is the right width, use it
810 instead of the source of the cast. */
811 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
812 input = lhs;
813 /* If the precision we want is between the source and result
814 precisions of the existing cast, try splitting the cast into
815 two and tapping into a mid-way point. */
816 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
817 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
819 /* In order to preserve the semantics of the original cast,
820 give the mid-way point the same signedness as the input value.
822 It would be possible to use a signed type here instead if
823 TYPE is signed and UNPROM->TYPE is unsigned, but that would
824 make the sign of the midtype sensitive to the order in
825 which we process the statements, since the signedness of
826 TYPE is the signedness required by just one of possibly
827 many users. Also, unsigned promotions are usually as cheap
828 as or cheaper than signed ones, so it's better to keep an
829 unsigned promotion. */
830 tree midtype = build_nonstandard_integer_type
831 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
832 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
833 if (vec_midtype)
835 input = vect_recog_temp_ssa_var (midtype, NULL);
836 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
837 unprom->op);
838 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
839 vec_midtype))
840 append_pattern_def_seq (vinfo, stmt_info,
841 new_stmt, vec_midtype);
845 /* See if we can reuse an existing result. */
846 if (types_compatible_p (type, TREE_TYPE (input)))
847 return input;
850 /* We need a new conversion statement. */
851 tree new_op = vect_recog_temp_ssa_var (type, NULL);
852 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
854 /* If OP is an external value, see if we can insert the new statement
855 on an incoming edge. */
856 if (input == unprom->op && unprom->dt == vect_external_def)
857 if (edge e = vect_get_external_def_edge (vinfo, input))
859 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
860 gcc_assert (!new_bb);
861 return new_op;
864 /* As a (common) last resort, add the statement to the pattern itself. */
865 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
866 return new_op;
869 /* Invoke vect_convert_input for N elements of UNPROM and store the
870 result in the corresponding elements of RESULT.
872 If SUBTYPE then convert the type based on the subtype. */
874 static void
875 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
876 tree *result, tree type, vect_unpromoted_value *unprom,
877 tree vectype, enum optab_subtype subtype = optab_default)
879 for (unsigned int i = 0; i < n; ++i)
881 unsigned int j;
882 for (j = 0; j < i; ++j)
883 if (unprom[j].op == unprom[i].op)
884 break;
886 if (j < i)
887 result[i] = result[j];
888 else
889 result[i] = vect_convert_input (vinfo, stmt_info,
890 type, &unprom[i], vectype, subtype);
894 /* The caller has created a (possibly empty) sequence of pattern definition
895 statements followed by a single statement PATTERN_STMT. Cast the result
896 of this final statement to TYPE. If a new statement is needed, add
897 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
898 and return the new statement, otherwise return PATTERN_STMT as-is.
899 VECITYPE is the vector form of PATTERN_STMT's result type. */
901 static gimple *
902 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
903 gimple *pattern_stmt, tree vecitype)
905 tree lhs = gimple_get_lhs (pattern_stmt);
906 if (!types_compatible_p (type, TREE_TYPE (lhs)))
908 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
909 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
910 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
912 return pattern_stmt;
915 /* Return true if STMT_VINFO describes a reduction for which reassociation
916 is allowed. If STMT_INFO is part of a group, assume that it's part of
917 a reduction chain and optimistically assume that all statements
918 except the last allow reassociation.
919 Also require it to have code CODE and to be a reduction
920 in the outermost loop. When returning true, store the operands in
921 *OP0_OUT and *OP1_OUT. */
923 static bool
924 vect_reassociating_reduction_p (vec_info *vinfo,
925 stmt_vec_info stmt_info, tree_code code,
926 tree *op0_out, tree *op1_out)
928 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
929 if (!loop_info)
930 return false;
932 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
933 if (!assign || gimple_assign_rhs_code (assign) != code)
934 return false;
936 /* We don't allow changing the order of the computation in the inner-loop
937 when doing outer-loop vectorization. */
938 class loop *loop = LOOP_VINFO_LOOP (loop_info);
939 if (loop && nested_in_vect_loop_p (loop, stmt_info))
940 return false;
942 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
944 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
945 code))
946 return false;
948 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
949 return false;
951 *op0_out = gimple_assign_rhs1 (assign);
952 *op1_out = gimple_assign_rhs2 (assign);
953 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
954 std::swap (*op0_out, *op1_out);
955 return true;
958 /* match.pd function to match
959 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
960 with conditions:
961 1) @1, @2, c, d, a, b are all integral type.
962 2) There's single_use for both @1 and @2.
963 3) a, c have same precision.
964 4) c and @1 have different precision.
965 5) c, d are the same type or they can differ in sign when convert is
966 truncation.
968 record a and c and d and @3. */
970 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
972 /* Function vect_recog_cond_expr_convert
974 Try to find the following pattern:
976 TYPE_AB A,B;
977 TYPE_CD C,D;
978 TYPE_E E;
979 TYPE_E op_true = (TYPE_E) A;
980 TYPE_E op_false = (TYPE_E) B;
982 E = C cmp D ? op_true : op_false;
984 where
985 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
986 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
987 single_use of op_true and op_false.
988 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
990 Input:
992 * STMT_VINFO: The stmt from which the pattern search begins.
993 here it starts with E = c cmp D ? op_true : op_false;
995 Output:
997 TYPE1 E' = C cmp D ? A : B;
998 TYPE3 E = (TYPE3) E';
1000 There may extra nop_convert for A or B to handle different signness.
1002 * TYPE_OUT: The vector type of the output of this pattern.
1004 * Return value: A new stmt that will be used to replace the sequence of
1005 stmts that constitute the pattern. In this case it will be:
1006 E = (TYPE3)E';
1007 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1009 static gimple *
1010 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1011 stmt_vec_info stmt_vinfo, tree *type_out)
1013 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1014 tree lhs, match[4], temp, type, new_lhs, op2;
1015 gimple *cond_stmt;
1016 gimple *pattern_stmt;
1018 if (!last_stmt)
1019 return NULL;
1021 lhs = gimple_assign_lhs (last_stmt);
1023 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1024 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1025 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1026 return NULL;
1028 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1030 op2 = match[2];
1031 type = TREE_TYPE (match[1]);
1032 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1034 op2 = vect_recog_temp_ssa_var (type, NULL);
1035 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1036 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1037 get_vectype_for_scalar_type (vinfo, type));
1040 temp = vect_recog_temp_ssa_var (type, NULL);
1041 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1042 match[1], op2));
1043 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1044 get_vectype_for_scalar_type (vinfo, type));
1045 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1046 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1047 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "created pattern stmt: %G", pattern_stmt);
1052 return pattern_stmt;
1055 /* Function vect_recog_dot_prod_pattern
1057 Try to find the following pattern:
1059 type1a x_t
1060 type1b y_t;
1061 TYPE1 prod;
1062 TYPE2 sum = init;
1063 loop:
1064 sum_0 = phi <init, sum_1>
1065 S1 x_t = ...
1066 S2 y_t = ...
1067 S3 x_T = (TYPE1) x_t;
1068 S4 y_T = (TYPE1) y_t;
1069 S5 prod = x_T * y_T;
1070 [S6 prod = (TYPE2) prod; #optional]
1071 S7 sum_1 = prod + sum_0;
1073 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1074 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1075 'type1a' and 'type1b' can differ.
1077 Input:
1079 * STMT_VINFO: The stmt from which the pattern search begins. In the
1080 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1081 will be detected.
1083 Output:
1085 * TYPE_OUT: The type of the output of this pattern.
1087 * Return value: A new stmt that will be used to replace the sequence of
1088 stmts that constitute the pattern. In this case it will be:
1089 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1091 Note: The dot-prod idiom is a widening reduction pattern that is
1092 vectorized without preserving all the intermediate results. It
1093 produces only N/2 (widened) results (by summing up pairs of
1094 intermediate results) rather than all N results. Therefore, we
1095 cannot allow this pattern when we want to get all the results and in
1096 the correct order (as is the case when this computation is in an
1097 inner-loop nested in an outer-loop that us being vectorized). */
1099 static gimple *
1100 vect_recog_dot_prod_pattern (vec_info *vinfo,
1101 stmt_vec_info stmt_vinfo, tree *type_out)
1103 tree oprnd0, oprnd1;
1104 gimple *last_stmt = stmt_vinfo->stmt;
1105 tree type, half_type;
1106 gimple *pattern_stmt;
1107 tree var;
1109 /* Look for the following pattern
1110 DX = (TYPE1) X;
1111 DY = (TYPE1) Y;
1112 DPROD = DX * DY;
1113 DDPROD = (TYPE2) DPROD;
1114 sum_1 = DDPROD + sum_0;
1115 In which
1116 - DX is double the size of X
1117 - DY is double the size of Y
1118 - DX, DY, DPROD all have the same type but the sign
1119 between X, Y and DPROD can differ.
1120 - sum is the same size of DPROD or bigger
1121 - sum has been recognized as a reduction variable.
1123 This is equivalent to:
1124 DPROD = X w* Y; #widen mult
1125 sum_1 = DPROD w+ sum_0; #widen summation
1127 DPROD = X w* Y; #widen mult
1128 sum_1 = DPROD + sum_0; #summation
1131 /* Starting from LAST_STMT, follow the defs of its uses in search
1132 of the above pattern. */
1134 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1135 &oprnd0, &oprnd1))
1136 return NULL;
1138 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1140 vect_unpromoted_value unprom_mult;
1141 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1143 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1144 we know that oprnd1 is the reduction variable (defined by a loop-header
1145 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1146 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1147 if (!oprnd0)
1148 return NULL;
1150 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1151 if (!mult_vinfo)
1152 return NULL;
1154 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1155 inside the loop (in case we are analyzing an outer-loop). */
1156 vect_unpromoted_value unprom0[2];
1157 enum optab_subtype subtype = optab_vector;
1158 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1159 false, 2, unprom0, &half_type, &subtype))
1160 return NULL;
1162 /* If there are two widening operations, make sure they agree on the sign
1163 of the extension. The result of an optab_vector_mixed_sign operation
1164 is signed; otherwise, the result has the same sign as the operands. */
1165 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1166 && (subtype == optab_vector_mixed_sign
1167 ? TYPE_UNSIGNED (unprom_mult.type)
1168 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1169 return NULL;
1171 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1173 /* If the inputs have mixed signs, canonicalize on using the signed
1174 input type for analysis. This also helps when emulating mixed-sign
1175 operations using signed operations. */
1176 if (subtype == optab_vector_mixed_sign)
1177 half_type = signed_type_for (half_type);
1179 tree half_vectype;
1180 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1181 type_out, &half_vectype, subtype))
1183 /* We can emulate a mixed-sign dot-product using a sequence of
1184 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1185 if (subtype != optab_vector_mixed_sign
1186 || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1187 DOT_PROD_EXPR, half_type,
1188 type_out, &half_vectype,
1189 optab_vector))
1190 return NULL;
1192 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1193 *type_out);
1196 /* Get the inputs in the appropriate types. */
1197 tree mult_oprnd[2];
1198 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1199 unprom0, half_vectype, subtype);
1201 var = vect_recog_temp_ssa_var (type, NULL);
1202 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1203 mult_oprnd[0], mult_oprnd[1], oprnd1);
1205 return pattern_stmt;
1209 /* Function vect_recog_sad_pattern
1211 Try to find the following Sum of Absolute Difference (SAD) pattern:
1213 type x_t, y_t;
1214 signed TYPE1 diff, abs_diff;
1215 TYPE2 sum = init;
1216 loop:
1217 sum_0 = phi <init, sum_1>
1218 S1 x_t = ...
1219 S2 y_t = ...
1220 S3 x_T = (TYPE1) x_t;
1221 S4 y_T = (TYPE1) y_t;
1222 S5 diff = x_T - y_T;
1223 S6 abs_diff = ABS_EXPR <diff>;
1224 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1225 S8 sum_1 = abs_diff + sum_0;
1227 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1228 same size of 'TYPE1' or bigger. This is a special case of a reduction
1229 computation.
1231 Input:
1233 * STMT_VINFO: The stmt from which the pattern search begins. In the
1234 example, when this function is called with S8, the pattern
1235 {S3,S4,S5,S6,S7,S8} will be detected.
1237 Output:
1239 * TYPE_OUT: The type of the output of this pattern.
1241 * Return value: A new stmt that will be used to replace the sequence of
1242 stmts that constitute the pattern. In this case it will be:
1243 SAD_EXPR <x_t, y_t, sum_0>
1246 static gimple *
1247 vect_recog_sad_pattern (vec_info *vinfo,
1248 stmt_vec_info stmt_vinfo, tree *type_out)
1250 gimple *last_stmt = stmt_vinfo->stmt;
1251 tree half_type;
1253 /* Look for the following pattern
1254 DX = (TYPE1) X;
1255 DY = (TYPE1) Y;
1256 DDIFF = DX - DY;
1257 DAD = ABS_EXPR <DDIFF>;
1258 DDPROD = (TYPE2) DPROD;
1259 sum_1 = DAD + sum_0;
1260 In which
1261 - DX is at least double the size of X
1262 - DY is at least double the size of Y
1263 - DX, DY, DDIFF, DAD all have the same type
1264 - sum is the same size of DAD or bigger
1265 - sum has been recognized as a reduction variable.
1267 This is equivalent to:
1268 DDIFF = X w- Y; #widen sub
1269 DAD = ABS_EXPR <DDIFF>;
1270 sum_1 = DAD w+ sum_0; #widen summation
1272 DDIFF = X w- Y; #widen sub
1273 DAD = ABS_EXPR <DDIFF>;
1274 sum_1 = DAD + sum_0; #summation
1277 /* Starting from LAST_STMT, follow the defs of its uses in search
1278 of the above pattern. */
1280 tree plus_oprnd0, plus_oprnd1;
1281 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1282 &plus_oprnd0, &plus_oprnd1))
1283 return NULL;
1285 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1287 /* Any non-truncating sequence of conversions is OK here, since
1288 with a successful match, the result of the ABS(U) is known to fit
1289 within the nonnegative range of the result type. (It cannot be the
1290 negative of the minimum signed value due to the range of the widening
1291 MINUS_EXPR.) */
1292 vect_unpromoted_value unprom_abs;
1293 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1294 &unprom_abs);
1296 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1297 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1298 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1299 Then check that plus_oprnd0 is defined by an abs_expr. */
1301 if (!plus_oprnd0)
1302 return NULL;
1304 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1305 if (!abs_stmt_vinfo)
1306 return NULL;
1308 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1309 inside the loop (in case we are analyzing an outer-loop). */
1310 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1311 if (!abs_stmt
1312 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1313 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1314 return NULL;
1316 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1317 tree abs_type = TREE_TYPE (abs_oprnd);
1318 if (TYPE_UNSIGNED (abs_type))
1319 return NULL;
1321 /* Peel off conversions from the ABS input. This can involve sign
1322 changes (e.g. from an unsigned subtraction to a signed ABS input)
1323 or signed promotion, but it can't include unsigned promotion.
1324 (Note that ABS of an unsigned promotion should have been folded
1325 away before now anyway.) */
1326 vect_unpromoted_value unprom_diff;
1327 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1328 &unprom_diff);
1329 if (!abs_oprnd)
1330 return NULL;
1331 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1332 && TYPE_UNSIGNED (unprom_diff.type))
1333 return NULL;
1335 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1336 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1337 if (!diff_stmt_vinfo)
1338 return NULL;
1340 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1341 inside the loop (in case we are analyzing an outer-loop). */
1342 vect_unpromoted_value unprom[2];
1343 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1344 false, 2, unprom, &half_type))
1345 return NULL;
1347 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1349 tree half_vectype;
1350 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1351 type_out, &half_vectype))
1352 return NULL;
1354 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1355 tree sad_oprnd[2];
1356 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1357 unprom, half_vectype);
1359 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1360 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1361 sad_oprnd[1], plus_oprnd1);
1363 return pattern_stmt;
1366 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1367 so that it can be treated as though it had the form:
1369 A_TYPE a;
1370 B_TYPE b;
1371 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1372 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1373 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1374 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1375 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1377 Try to replace the pattern with:
1379 A_TYPE a;
1380 B_TYPE b;
1381 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1382 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1383 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1384 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1386 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1388 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1389 name of the pattern being matched, for dump purposes. */
1391 static gimple *
1392 vect_recog_widen_op_pattern (vec_info *vinfo,
1393 stmt_vec_info last_stmt_info, tree *type_out,
1394 tree_code orig_code, tree_code wide_code,
1395 bool shift_p, const char *name)
1397 gimple *last_stmt = last_stmt_info->stmt;
1399 vect_unpromoted_value unprom[2];
1400 tree half_type;
1401 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1402 shift_p, 2, unprom, &half_type))
1403 return NULL;
1405 /* Pattern detected. */
1406 vect_pattern_detected (name, last_stmt);
1408 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1409 tree itype = type;
1410 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1411 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1412 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1413 TYPE_UNSIGNED (half_type));
1415 /* Check target support */
1416 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1417 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1418 tree ctype = itype;
1419 tree vecctype = vecitype;
1420 if (orig_code == MINUS_EXPR
1421 && TYPE_UNSIGNED (itype)
1422 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1424 /* Subtraction is special, even if half_type is unsigned and no matter
1425 whether type is signed or unsigned, if type is wider than itype,
1426 we need to sign-extend from the widening operation result to the
1427 result type.
1428 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1429 itype unsigned short and type either int or unsigned int.
1430 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1431 (unsigned short) 0xffff, but for type int we want the result -1
1432 and for type unsigned int 0xffffffff rather than 0xffff. */
1433 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1434 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1437 enum tree_code dummy_code;
1438 int dummy_int;
1439 auto_vec<tree> dummy_vec;
1440 if (!vectype
1441 || !vecitype
1442 || !vecctype
1443 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1444 vecitype, vectype,
1445 &dummy_code, &dummy_code,
1446 &dummy_int, &dummy_vec))
1447 return NULL;
1449 *type_out = get_vectype_for_scalar_type (vinfo, type);
1450 if (!*type_out)
1451 return NULL;
1453 tree oprnd[2];
1454 vect_convert_inputs (vinfo, last_stmt_info,
1455 2, oprnd, half_type, unprom, vectype);
1457 tree var = vect_recog_temp_ssa_var (itype, NULL);
1458 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1459 oprnd[0], oprnd[1]);
1461 if (vecctype != vecitype)
1462 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1463 pattern_stmt, vecitype);
1465 return vect_convert_output (vinfo, last_stmt_info,
1466 type, pattern_stmt, vecctype);
1469 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1470 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1472 static gimple *
1473 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1474 tree *type_out)
1476 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1477 MULT_EXPR, WIDEN_MULT_EXPR, false,
1478 "vect_recog_widen_mult_pattern");
1481 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1482 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1484 static gimple *
1485 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1486 tree *type_out)
1488 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1489 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1490 "vect_recog_widen_plus_pattern");
1493 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1494 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1495 static gimple *
1496 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1497 tree *type_out)
1499 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1500 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1501 "vect_recog_widen_minus_pattern");
1504 /* Function vect_recog_popcount_pattern
1506 Try to find the following pattern:
1508 UTYPE1 A;
1509 TYPE1 B;
1510 UTYPE2 temp_in;
1511 TYPE3 temp_out;
1512 temp_in = (UTYPE2)A;
1514 temp_out = __builtin_popcount{,l,ll} (temp_in);
1515 B = (TYPE1) temp_out;
1517 TYPE2 may or may not be equal to TYPE3.
1518 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1519 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1521 Input:
1523 * STMT_VINFO: The stmt from which the pattern search begins.
1524 here it starts with B = (TYPE1) temp_out;
1526 Output:
1528 * TYPE_OUT: The vector type of the output of this pattern.
1530 * Return value: A new stmt that will be used to replace the sequence of
1531 stmts that constitute the pattern. In this case it will be:
1532 B = .POPCOUNT (A);
1535 static gimple *
1536 vect_recog_popcount_pattern (vec_info *vinfo,
1537 stmt_vec_info stmt_vinfo, tree *type_out)
1539 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1540 gimple *popcount_stmt, *pattern_stmt;
1541 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1542 auto_vec<tree> vargs;
1544 /* Find B = (TYPE1) temp_out. */
1545 if (!last_stmt)
1546 return NULL;
1547 tree_code code = gimple_assign_rhs_code (last_stmt);
1548 if (!CONVERT_EXPR_CODE_P (code))
1549 return NULL;
1551 lhs_oprnd = gimple_assign_lhs (last_stmt);
1552 lhs_type = TREE_TYPE (lhs_oprnd);
1553 if (!INTEGRAL_TYPE_P (lhs_type))
1554 return NULL;
1556 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1557 if (TREE_CODE (rhs_oprnd) != SSA_NAME
1558 || !has_single_use (rhs_oprnd))
1559 return NULL;
1560 popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1562 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1563 if (!is_gimple_call (popcount_stmt))
1564 return NULL;
1565 switch (gimple_call_combined_fn (popcount_stmt))
1567 CASE_CFN_POPCOUNT:
1568 break;
1569 default:
1570 return NULL;
1573 if (gimple_call_num_args (popcount_stmt) != 1)
1574 return NULL;
1576 rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1577 vect_unpromoted_value unprom_diff;
1578 rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1579 &unprom_diff);
1581 if (!rhs_origin)
1582 return NULL;
1584 /* Input and output of .POPCOUNT should be same-precision integer.
1585 Also A should be unsigned or same precision as temp_in,
1586 otherwise there would be sign_extend from A to temp_in. */
1587 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1588 || (!TYPE_UNSIGNED (unprom_diff.type)
1589 && (TYPE_PRECISION (unprom_diff.type)
1590 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1591 return NULL;
1592 vargs.safe_push (unprom_diff.op);
1594 vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1595 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1596 /* Do it only if the backend has popcount<vector_mode>2 pattern. */
1597 if (!vec_type
1598 || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1599 OPTIMIZE_FOR_SPEED))
1600 return NULL;
1602 /* Create B = .POPCOUNT (A). */
1603 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1604 pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1605 gimple_call_set_lhs (pattern_stmt, new_var);
1606 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1607 *type_out = vec_type;
1609 if (dump_enabled_p ())
1610 dump_printf_loc (MSG_NOTE, vect_location,
1611 "created pattern stmt: %G", pattern_stmt);
1612 return pattern_stmt;
1615 /* Function vect_recog_pow_pattern
1617 Try to find the following pattern:
1619 x = POW (y, N);
1621 with POW being one of pow, powf, powi, powif and N being
1622 either 2 or 0.5.
1624 Input:
1626 * STMT_VINFO: The stmt from which the pattern search begins.
1628 Output:
1630 * TYPE_OUT: The type of the output of this pattern.
1632 * Return value: A new stmt that will be used to replace the sequence of
1633 stmts that constitute the pattern. In this case it will be:
1634 x = x * x
1636 x = sqrt (x)
1639 static gimple *
1640 vect_recog_pow_pattern (vec_info *vinfo,
1641 stmt_vec_info stmt_vinfo, tree *type_out)
1643 gimple *last_stmt = stmt_vinfo->stmt;
1644 tree base, exp;
1645 gimple *stmt;
1646 tree var;
1648 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1649 return NULL;
1651 switch (gimple_call_combined_fn (last_stmt))
1653 CASE_CFN_POW:
1654 CASE_CFN_POWI:
1655 break;
1657 default:
1658 return NULL;
1661 base = gimple_call_arg (last_stmt, 0);
1662 exp = gimple_call_arg (last_stmt, 1);
1663 if (TREE_CODE (exp) != REAL_CST
1664 && TREE_CODE (exp) != INTEGER_CST)
1666 if (flag_unsafe_math_optimizations
1667 && TREE_CODE (base) == REAL_CST
1668 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1670 combined_fn log_cfn;
1671 built_in_function exp_bfn;
1672 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1674 case BUILT_IN_POW:
1675 log_cfn = CFN_BUILT_IN_LOG;
1676 exp_bfn = BUILT_IN_EXP;
1677 break;
1678 case BUILT_IN_POWF:
1679 log_cfn = CFN_BUILT_IN_LOGF;
1680 exp_bfn = BUILT_IN_EXPF;
1681 break;
1682 case BUILT_IN_POWL:
1683 log_cfn = CFN_BUILT_IN_LOGL;
1684 exp_bfn = BUILT_IN_EXPL;
1685 break;
1686 default:
1687 return NULL;
1689 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1690 tree exp_decl = builtin_decl_implicit (exp_bfn);
1691 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1692 does that, but if C is a power of 2, we want to use
1693 exp2 (log2 (C) * x) in the non-vectorized version, but for
1694 vectorization we don't have vectorized exp2. */
1695 if (logc
1696 && TREE_CODE (logc) == REAL_CST
1697 && exp_decl
1698 && lookup_attribute ("omp declare simd",
1699 DECL_ATTRIBUTES (exp_decl)))
1701 cgraph_node *node = cgraph_node::get_create (exp_decl);
1702 if (node->simd_clones == NULL)
1704 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1705 || node->definition)
1706 return NULL;
1707 expand_simd_clones (node);
1708 if (node->simd_clones == NULL)
1709 return NULL;
1711 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1712 if (!*type_out)
1713 return NULL;
1714 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1715 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1716 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1717 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1718 g = gimple_build_call (exp_decl, 1, def);
1719 gimple_call_set_lhs (g, res);
1720 return g;
1724 return NULL;
1727 /* We now have a pow or powi builtin function call with a constant
1728 exponent. */
1730 /* Catch squaring. */
1731 if ((tree_fits_shwi_p (exp)
1732 && tree_to_shwi (exp) == 2)
1733 || (TREE_CODE (exp) == REAL_CST
1734 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1736 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1737 TREE_TYPE (base), type_out))
1738 return NULL;
1740 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1741 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1742 return stmt;
1745 /* Catch square root. */
1746 if (TREE_CODE (exp) == REAL_CST
1747 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1749 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1750 if (*type_out
1751 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1752 OPTIMIZE_FOR_SPEED))
1754 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1755 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1756 gimple_call_set_lhs (stmt, var);
1757 gimple_call_set_nothrow (stmt, true);
1758 return stmt;
1762 return NULL;
1766 /* Function vect_recog_widen_sum_pattern
1768 Try to find the following pattern:
1770 type x_t;
1771 TYPE x_T, sum = init;
1772 loop:
1773 sum_0 = phi <init, sum_1>
1774 S1 x_t = *p;
1775 S2 x_T = (TYPE) x_t;
1776 S3 sum_1 = x_T + sum_0;
1778 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1779 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1780 a special case of a reduction computation.
1782 Input:
1784 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1785 when this function is called with S3, the pattern {S2,S3} will be detected.
1787 Output:
1789 * TYPE_OUT: The type of the output of this pattern.
1791 * Return value: A new stmt that will be used to replace the sequence of
1792 stmts that constitute the pattern. In this case it will be:
1793 WIDEN_SUM <x_t, sum_0>
1795 Note: The widening-sum idiom is a widening reduction pattern that is
1796 vectorized without preserving all the intermediate results. It
1797 produces only N/2 (widened) results (by summing up pairs of
1798 intermediate results) rather than all N results. Therefore, we
1799 cannot allow this pattern when we want to get all the results and in
1800 the correct order (as is the case when this computation is in an
1801 inner-loop nested in an outer-loop that us being vectorized). */
1803 static gimple *
1804 vect_recog_widen_sum_pattern (vec_info *vinfo,
1805 stmt_vec_info stmt_vinfo, tree *type_out)
1807 gimple *last_stmt = stmt_vinfo->stmt;
1808 tree oprnd0, oprnd1;
1809 tree type;
1810 gimple *pattern_stmt;
1811 tree var;
1813 /* Look for the following pattern
1814 DX = (TYPE) X;
1815 sum_1 = DX + sum_0;
1816 In which DX is at least double the size of X, and sum_1 has been
1817 recognized as a reduction variable.
1820 /* Starting from LAST_STMT, follow the defs of its uses in search
1821 of the above pattern. */
1823 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1824 &oprnd0, &oprnd1)
1825 || TREE_CODE (oprnd0) != SSA_NAME
1826 || !vinfo->lookup_def (oprnd0))
1827 return NULL;
1829 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1831 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1832 we know that oprnd1 is the reduction variable (defined by a loop-header
1833 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1834 Left to check that oprnd0 is defined by a cast from type 'type' to type
1835 'TYPE'. */
1837 vect_unpromoted_value unprom0;
1838 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1839 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1840 return NULL;
1842 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1844 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1845 unprom0.type, type_out))
1846 return NULL;
1848 var = vect_recog_temp_ssa_var (type, NULL);
1849 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1851 return pattern_stmt;
1854 /* Function vect_recog_bitfield_ref_pattern
1856 Try to find the following pattern:
1858 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
1859 result = (type_out) bf_value;
1861 where type_out is a non-bitfield type, that is to say, it's precision matches
1862 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
1864 Input:
1866 * STMT_VINFO: The stmt from which the pattern search begins.
1867 here it starts with:
1868 result = (type_out) bf_value;
1870 Output:
1872 * TYPE_OUT: The vector type of the output of this pattern.
1874 * Return value: A new stmt that will be used to replace the sequence of
1875 stmts that constitute the pattern. If the precision of type_out is bigger
1876 than the precision type of _1 we perform the widening before the shifting,
1877 since the new precision will be large enough to shift the value and moving
1878 widening operations up the statement chain enables the generation of
1879 widening loads. If we are widening and the operation after the pattern is
1880 an addition then we mask first and shift later, to enable the generation of
1881 shifting adds. In the case of narrowing we will always mask first, shift
1882 last and then perform a narrowing operation. This will enable the
1883 generation of narrowing shifts.
1885 Widening with mask first, shift later:
1886 container = (type_out) container;
1887 masked = container & (((1 << bitsize) - 1) << bitpos);
1888 result = patt2 >> masked;
1890 Widening with shift first, mask last:
1891 container = (type_out) container;
1892 shifted = container >> bitpos;
1893 result = shifted & ((1 << bitsize) - 1);
1895 Narrowing:
1896 masked = container & (((1 << bitsize) - 1) << bitpos);
1897 result = masked >> bitpos;
1898 result = (type_out) result;
1900 The shifting is always optional depending on whether bitpos != 0.
1904 static gimple *
1905 vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
1906 tree *type_out)
1908 gassign *first_stmt = dyn_cast <gassign *> (stmt_info->stmt);
1910 if (!first_stmt)
1911 return NULL;
1913 gassign *bf_stmt;
1914 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (first_stmt))
1915 && TREE_CODE (gimple_assign_rhs1 (first_stmt)) == SSA_NAME)
1917 gimple *second_stmt
1918 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (first_stmt));
1919 bf_stmt = dyn_cast <gassign *> (second_stmt);
1920 if (!bf_stmt
1921 || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
1922 return NULL;
1924 else
1925 return NULL;
1927 tree bf_ref = gimple_assign_rhs1 (bf_stmt);
1928 tree container = TREE_OPERAND (bf_ref, 0);
1930 if (!bit_field_offset (bf_ref).is_constant ()
1931 || !bit_field_size (bf_ref).is_constant ()
1932 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
1933 return NULL;
1935 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
1936 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
1937 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
1938 return NULL;
1940 gimple *use_stmt, *pattern_stmt;
1941 use_operand_p use_p;
1942 tree ret = gimple_assign_lhs (first_stmt);
1943 tree ret_type = TREE_TYPE (ret);
1944 bool shift_first = true;
1945 tree container_type = TREE_TYPE (container);
1946 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
1948 /* Calculate shift_n before the adjustments for widening loads, otherwise
1949 the container may change and we have to consider offset change for
1950 widening loads on big endianness. The shift_n calculated here can be
1951 independent of widening. */
1952 unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
1953 unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
1954 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
1955 if (BYTES_BIG_ENDIAN)
1956 shift_n = prec - shift_n - mask_width;
1958 /* We move the conversion earlier if the loaded type is smaller than the
1959 return type to enable the use of widening loads. */
1960 if (TYPE_PRECISION (TREE_TYPE (container)) < TYPE_PRECISION (ret_type)
1961 && !useless_type_conversion_p (TREE_TYPE (container), ret_type))
1963 pattern_stmt
1964 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
1965 NOP_EXPR, container);
1966 container = gimple_get_lhs (pattern_stmt);
1967 container_type = TREE_TYPE (container);
1968 prec = tree_to_uhwi (TYPE_SIZE (container_type));
1969 vectype = get_vectype_for_scalar_type (vinfo, container_type);
1970 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
1972 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
1973 /* If we are doing the conversion last then also delay the shift as we may
1974 be able to combine the shift and conversion in certain cases. */
1975 shift_first = false;
1977 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
1978 PLUS_EXPR then do the shift last as some targets can combine the shift and
1979 add into a single instruction. */
1980 if (single_imm_use (gimple_assign_lhs (first_stmt), &use_p, &use_stmt))
1982 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
1983 && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
1984 shift_first = false;
1987 /* If we don't have to shift we only generate the mask, so just fix the
1988 code-path to shift_first. */
1989 if (shift_n == 0)
1990 shift_first = true;
1992 tree result;
1993 if (shift_first)
1995 tree shifted = container;
1996 if (shift_n)
1998 pattern_stmt
1999 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2000 RSHIFT_EXPR, container,
2001 build_int_cst (sizetype, shift_n));
2002 shifted = gimple_assign_lhs (pattern_stmt);
2003 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2006 tree mask = wide_int_to_tree (container_type,
2007 wi::mask (mask_width, false, prec));
2009 pattern_stmt
2010 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2011 BIT_AND_EXPR, shifted, mask);
2012 result = gimple_assign_lhs (pattern_stmt);
2014 else
2016 tree mask = wide_int_to_tree (container_type,
2017 wi::shifted_mask (shift_n, mask_width,
2018 false, prec));
2019 pattern_stmt
2020 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2021 BIT_AND_EXPR, container, mask);
2022 tree masked = gimple_assign_lhs (pattern_stmt);
2024 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2025 pattern_stmt
2026 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2027 RSHIFT_EXPR, masked,
2028 build_int_cst (sizetype, shift_n));
2029 result = gimple_assign_lhs (pattern_stmt);
2032 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2034 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2035 pattern_stmt
2036 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2037 NOP_EXPR, result);
2040 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2041 vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2043 return pattern_stmt;
2046 /* Function vect_recog_bit_insert_pattern
2048 Try to find the following pattern:
2050 written = BIT_INSERT_EXPR (container, value, bitpos);
2052 Input:
2054 * STMT_VINFO: The stmt we want to replace.
2056 Output:
2058 * TYPE_OUT: The vector type of the output of this pattern.
2060 * Return value: A new stmt that will be used to replace the sequence of
2061 stmts that constitute the pattern. In this case it will be:
2062 value = (container_type) value; // Make sure
2063 shifted = value << bitpos; // Shift value into place
2064 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2065 // the 'to-write value'.
2066 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2067 // write to from the value we want
2068 // to write to.
2069 written = cleared | masked; // Write bits.
2072 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2073 bits corresponding to the real size of the bitfield value we are writing to.
2074 The shifting is always optional depending on whether bitpos != 0.
2078 static gimple *
2079 vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2080 tree *type_out)
2082 gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2083 if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2084 return NULL;
2086 tree container = gimple_assign_rhs1 (bf_stmt);
2087 tree value = gimple_assign_rhs2 (bf_stmt);
2088 tree shift = gimple_assign_rhs3 (bf_stmt);
2090 tree bf_type = TREE_TYPE (value);
2091 tree container_type = TREE_TYPE (container);
2093 if (!INTEGRAL_TYPE_P (container_type)
2094 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2095 return NULL;
2097 gimple *pattern_stmt;
2099 vect_unpromoted_value unprom;
2100 unprom.set_op (value, vect_internal_def);
2101 value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2102 get_vectype_for_scalar_type (vinfo,
2103 container_type));
2105 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2106 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2107 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2108 if (BYTES_BIG_ENDIAN)
2110 shift_n = prec - shift_n - mask_width;
2111 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2114 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2116 pattern_stmt =
2117 gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2118 NOP_EXPR, value);
2119 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2120 value = gimple_get_lhs (pattern_stmt);
2123 /* Shift VALUE into place. */
2124 tree shifted = value;
2125 if (shift_n)
2127 gimple_seq stmts = NULL;
2128 shifted
2129 = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2130 if (!gimple_seq_empty_p (stmts))
2131 append_pattern_def_seq (vinfo, stmt_info,
2132 gimple_seq_first_stmt (stmts));
2135 tree mask_t
2136 = wide_int_to_tree (container_type,
2137 wi::shifted_mask (shift_n, mask_width, false, prec));
2139 /* Clear bits we don't want to write back from SHIFTED. */
2140 gimple_seq stmts = NULL;
2141 tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2142 mask_t);
2143 if (!gimple_seq_empty_p (stmts))
2145 pattern_stmt = gimple_seq_first_stmt (stmts);
2146 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2149 /* Mask off the bits in the container that we are to write to. */
2150 mask_t = wide_int_to_tree (container_type,
2151 wi::shifted_mask (shift_n, mask_width, true, prec));
2152 tree cleared = vect_recog_temp_ssa_var (container_type);
2153 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2154 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2156 /* Write MASKED into CLEARED. */
2157 pattern_stmt
2158 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2159 BIT_IOR_EXPR, cleared, masked);
2161 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2162 vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2164 return pattern_stmt;
2168 /* Recognize cases in which an operation is performed in one type WTYPE
2169 but could be done more efficiently in a narrower type NTYPE. For example,
2170 if we have:
2172 ATYPE a; // narrower than NTYPE
2173 BTYPE b; // narrower than NTYPE
2174 WTYPE aw = (WTYPE) a;
2175 WTYPE bw = (WTYPE) b;
2176 WTYPE res = aw + bw; // only uses of aw and bw
2178 then it would be more efficient to do:
2180 NTYPE an = (NTYPE) a;
2181 NTYPE bn = (NTYPE) b;
2182 NTYPE resn = an + bn;
2183 WTYPE res = (WTYPE) resn;
2185 Other situations include things like:
2187 ATYPE a; // NTYPE or narrower
2188 WTYPE aw = (WTYPE) a;
2189 WTYPE res = aw + b;
2191 when only "(NTYPE) res" is significant. In that case it's more efficient
2192 to truncate "b" and do the operation on NTYPE instead:
2194 NTYPE an = (NTYPE) a;
2195 NTYPE bn = (NTYPE) b; // truncation
2196 NTYPE resn = an + bn;
2197 WTYPE res = (WTYPE) resn;
2199 All users of "res" should then use "resn" instead, making the final
2200 statement dead (not marked as relevant). The final statement is still
2201 needed to maintain the type correctness of the IR.
2203 vect_determine_precisions has already determined the minimum
2204 precison of the operation and the minimum precision required
2205 by users of the result. */
2207 static gimple *
2208 vect_recog_over_widening_pattern (vec_info *vinfo,
2209 stmt_vec_info last_stmt_info, tree *type_out)
2211 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2212 if (!last_stmt)
2213 return NULL;
2215 /* See whether we have found that this operation can be done on a
2216 narrower type without changing its semantics. */
2217 unsigned int new_precision = last_stmt_info->operation_precision;
2218 if (!new_precision)
2219 return NULL;
2221 tree lhs = gimple_assign_lhs (last_stmt);
2222 tree type = TREE_TYPE (lhs);
2223 tree_code code = gimple_assign_rhs_code (last_stmt);
2225 /* Punt for reductions where we don't handle the type conversions. */
2226 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
2227 return NULL;
2229 /* Keep the first operand of a COND_EXPR as-is: only the other two
2230 operands are interesting. */
2231 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
2233 /* Check the operands. */
2234 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
2235 auto_vec <vect_unpromoted_value, 3> unprom (nops);
2236 unprom.quick_grow (nops);
2237 unsigned int min_precision = 0;
2238 bool single_use_p = false;
2239 for (unsigned int i = 0; i < nops; ++i)
2241 tree op = gimple_op (last_stmt, first_op + i);
2242 if (TREE_CODE (op) == INTEGER_CST)
2243 unprom[i].set_op (op, vect_constant_def);
2244 else if (TREE_CODE (op) == SSA_NAME)
2246 bool op_single_use_p = true;
2247 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
2248 &op_single_use_p))
2249 return NULL;
2250 /* If:
2252 (1) N bits of the result are needed;
2253 (2) all inputs are widened from M<N bits; and
2254 (3) one operand OP is a single-use SSA name
2256 we can shift the M->N widening from OP to the output
2257 without changing the number or type of extensions involved.
2258 This then reduces the number of copies of STMT_INFO.
2260 If instead of (3) more than one operand is a single-use SSA name,
2261 shifting the extension to the output is even more of a win.
2263 If instead:
2265 (1) N bits of the result are needed;
2266 (2) one operand OP2 is widened from M2<N bits;
2267 (3) another operand OP1 is widened from M1<M2 bits; and
2268 (4) both OP1 and OP2 are single-use
2270 the choice is between:
2272 (a) truncating OP2 to M1, doing the operation on M1,
2273 and then widening the result to N
2275 (b) widening OP1 to M2, doing the operation on M2, and then
2276 widening the result to N
2278 Both shift the M2->N widening of the inputs to the output.
2279 (a) additionally shifts the M1->M2 widening to the output;
2280 it requires fewer copies of STMT_INFO but requires an extra
2281 M2->M1 truncation.
2283 Which is better will depend on the complexity and cost of
2284 STMT_INFO, which is hard to predict at this stage. However,
2285 a clear tie-breaker in favor of (b) is the fact that the
2286 truncation in (a) increases the length of the operation chain.
2288 If instead of (4) only one of OP1 or OP2 is single-use,
2289 (b) is still a win over doing the operation in N bits:
2290 it still shifts the M2->N widening on the single-use operand
2291 to the output and reduces the number of STMT_INFO copies.
2293 If neither operand is single-use then operating on fewer than
2294 N bits might lead to more extensions overall. Whether it does
2295 or not depends on global information about the vectorization
2296 region, and whether that's a good trade-off would again
2297 depend on the complexity and cost of the statements involved,
2298 as well as things like register pressure that are not normally
2299 modelled at this stage. We therefore ignore these cases
2300 and just optimize the clear single-use wins above.
2302 Thus we take the maximum precision of the unpromoted operands
2303 and record whether any operand is single-use. */
2304 if (unprom[i].dt == vect_internal_def)
2306 min_precision = MAX (min_precision,
2307 TYPE_PRECISION (unprom[i].type));
2308 single_use_p |= op_single_use_p;
2311 else
2312 return NULL;
2315 /* Although the operation could be done in operation_precision, we have
2316 to balance that against introducing extra truncations or extensions.
2317 Calculate the minimum precision that can be handled efficiently.
2319 The loop above determined that the operation could be handled
2320 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
2321 extension from the inputs to the output without introducing more
2322 instructions, and would reduce the number of instructions required
2323 for STMT_INFO itself.
2325 vect_determine_precisions has also determined that the result only
2326 needs min_output_precision bits. Truncating by a factor of N times
2327 requires a tree of N - 1 instructions, so if TYPE is N times wider
2328 than min_output_precision, doing the operation in TYPE and truncating
2329 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
2330 In contrast:
2332 - truncating the input to a unary operation and doing the operation
2333 in the new type requires at most N - 1 + 1 = N instructions per
2334 output vector
2336 - doing the same for a binary operation requires at most
2337 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
2339 Both unary and binary operations require fewer instructions than
2340 this if the operands were extended from a suitable truncated form.
2341 Thus there is usually nothing to lose by doing operations in
2342 min_output_precision bits, but there can be something to gain. */
2343 if (!single_use_p)
2344 min_precision = last_stmt_info->min_output_precision;
2345 else
2346 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
2348 /* Apply the minimum efficient precision we just calculated. */
2349 if (new_precision < min_precision)
2350 new_precision = min_precision;
2351 new_precision = vect_element_precision (new_precision);
2352 if (new_precision >= TYPE_PRECISION (type))
2353 return NULL;
2355 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
2357 *type_out = get_vectype_for_scalar_type (vinfo, type);
2358 if (!*type_out)
2359 return NULL;
2361 /* We've found a viable pattern. Get the new type of the operation. */
2362 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
2363 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
2365 /* If we're truncating an operation, we need to make sure that we
2366 don't introduce new undefined overflow. The codes tested here are
2367 a subset of those accepted by vect_truncatable_operation_p. */
2368 tree op_type = new_type;
2369 if (TYPE_OVERFLOW_UNDEFINED (new_type)
2370 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
2371 op_type = build_nonstandard_integer_type (new_precision, true);
2373 /* We specifically don't check here whether the target supports the
2374 new operation, since it might be something that a later pattern
2375 wants to rewrite anyway. If targets have a minimum element size
2376 for some optabs, we should pattern-match smaller ops to larger ops
2377 where beneficial. */
2378 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2379 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
2380 if (!new_vectype || !op_vectype)
2381 return NULL;
2383 if (dump_enabled_p ())
2384 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
2385 type, new_type);
2387 /* Calculate the rhs operands for an operation on OP_TYPE. */
2388 tree ops[3] = {};
2389 for (unsigned int i = 1; i < first_op; ++i)
2390 ops[i - 1] = gimple_op (last_stmt, i);
2391 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
2392 op_type, &unprom[0], op_vectype);
2394 /* Use the operation to produce a result of type OP_TYPE. */
2395 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
2396 gimple *pattern_stmt = gimple_build_assign (new_var, code,
2397 ops[0], ops[1], ops[2]);
2398 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2400 if (dump_enabled_p ())
2401 dump_printf_loc (MSG_NOTE, vect_location,
2402 "created pattern stmt: %G", pattern_stmt);
2404 /* Convert back to the original signedness, if OP_TYPE is different
2405 from NEW_TYPE. */
2406 if (op_type != new_type)
2407 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
2408 pattern_stmt, op_vectype);
2410 /* Promote the result to the original type. */
2411 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
2412 pattern_stmt, new_vectype);
2414 return pattern_stmt;
2417 /* Recognize the following patterns:
2419 ATYPE a; // narrower than TYPE
2420 BTYPE b; // narrower than TYPE
2422 1) Multiply high with scaling
2423 TYPE res = ((TYPE) a * (TYPE) b) >> c;
2424 Here, c is bitsize (TYPE) / 2 - 1.
2426 2) ... or also with rounding
2427 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2428 Here, d is bitsize (TYPE) / 2 - 2.
2430 3) Normal multiply high
2431 TYPE res = ((TYPE) a * (TYPE) b) >> e;
2432 Here, e is bitsize (TYPE) / 2.
2434 where only the bottom half of res is used. */
2436 static gimple *
2437 vect_recog_mulhs_pattern (vec_info *vinfo,
2438 stmt_vec_info last_stmt_info, tree *type_out)
2440 /* Check for a right shift. */
2441 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2442 if (!last_stmt
2443 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
2444 return NULL;
2446 /* Check that the shift result is wider than the users of the
2447 result need (i.e. that narrowing would be a natural choice). */
2448 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
2449 unsigned int target_precision
2450 = vect_element_precision (last_stmt_info->min_output_precision);
2451 if (!INTEGRAL_TYPE_P (lhs_type)
2452 || target_precision >= TYPE_PRECISION (lhs_type))
2453 return NULL;
2455 /* Look through any change in sign on the outer shift input. */
2456 vect_unpromoted_value unprom_rshift_input;
2457 tree rshift_input = vect_look_through_possible_promotion
2458 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
2459 if (!rshift_input
2460 || TYPE_PRECISION (TREE_TYPE (rshift_input))
2461 != TYPE_PRECISION (lhs_type))
2462 return NULL;
2464 /* Get the definition of the shift input. */
2465 stmt_vec_info rshift_input_stmt_info
2466 = vect_get_internal_def (vinfo, rshift_input);
2467 if (!rshift_input_stmt_info)
2468 return NULL;
2469 gassign *rshift_input_stmt
2470 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2471 if (!rshift_input_stmt)
2472 return NULL;
2474 stmt_vec_info mulh_stmt_info;
2475 tree scale_term;
2476 bool rounding_p = false;
2478 /* Check for the presence of the rounding term. */
2479 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2481 /* Check that the outer shift was by 1. */
2482 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2483 return NULL;
2485 /* Check that the second operand of the PLUS_EXPR is 1. */
2486 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2487 return NULL;
2489 /* Look through any change in sign on the addition input. */
2490 vect_unpromoted_value unprom_plus_input;
2491 tree plus_input = vect_look_through_possible_promotion
2492 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2493 if (!plus_input
2494 || TYPE_PRECISION (TREE_TYPE (plus_input))
2495 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2496 return NULL;
2498 /* Get the definition of the multiply-high-scale part. */
2499 stmt_vec_info plus_input_stmt_info
2500 = vect_get_internal_def (vinfo, plus_input);
2501 if (!plus_input_stmt_info)
2502 return NULL;
2503 gassign *plus_input_stmt
2504 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2505 if (!plus_input_stmt
2506 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2507 return NULL;
2509 /* Look through any change in sign on the scaling input. */
2510 vect_unpromoted_value unprom_scale_input;
2511 tree scale_input = vect_look_through_possible_promotion
2512 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2513 if (!scale_input
2514 || TYPE_PRECISION (TREE_TYPE (scale_input))
2515 != TYPE_PRECISION (TREE_TYPE (plus_input)))
2516 return NULL;
2518 /* Get the definition of the multiply-high part. */
2519 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2520 if (!mulh_stmt_info)
2521 return NULL;
2523 /* Get the scaling term. */
2524 scale_term = gimple_assign_rhs2 (plus_input_stmt);
2525 rounding_p = true;
2527 else
2529 mulh_stmt_info = rshift_input_stmt_info;
2530 scale_term = gimple_assign_rhs2 (last_stmt);
2533 /* Check that the scaling factor is constant. */
2534 if (TREE_CODE (scale_term) != INTEGER_CST)
2535 return NULL;
2537 /* Check whether the scaling input term can be seen as two widened
2538 inputs multiplied together. */
2539 vect_unpromoted_value unprom_mult[2];
2540 tree new_type;
2541 unsigned int nops
2542 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2543 false, 2, unprom_mult, &new_type);
2544 if (nops != 2)
2545 return NULL;
2547 /* Adjust output precision. */
2548 if (TYPE_PRECISION (new_type) < target_precision)
2549 new_type = build_nonstandard_integer_type
2550 (target_precision, TYPE_UNSIGNED (new_type));
2552 unsigned mult_precision = TYPE_PRECISION (new_type);
2553 internal_fn ifn;
2554 /* Check that the scaling factor is expected. Instead of
2555 target_precision, we should use the one that we actually
2556 use for internal function. */
2557 if (rounding_p)
2559 /* Check pattern 2). */
2560 if (wi::to_widest (scale_term) + mult_precision + 2
2561 != TYPE_PRECISION (lhs_type))
2562 return NULL;
2564 ifn = IFN_MULHRS;
2566 else
2568 /* Check for pattern 1). */
2569 if (wi::to_widest (scale_term) + mult_precision + 1
2570 == TYPE_PRECISION (lhs_type))
2571 ifn = IFN_MULHS;
2572 /* Check for pattern 3). */
2573 else if (wi::to_widest (scale_term) + mult_precision
2574 == TYPE_PRECISION (lhs_type))
2575 ifn = IFN_MULH;
2576 else
2577 return NULL;
2580 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2582 /* Check for target support. */
2583 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2584 if (!new_vectype
2585 || !direct_internal_fn_supported_p
2586 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2587 return NULL;
2589 /* The IR requires a valid vector type for the cast result, even though
2590 it's likely to be discarded. */
2591 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2592 if (!*type_out)
2593 return NULL;
2595 /* Generate the IFN_MULHRS call. */
2596 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2597 tree new_ops[2];
2598 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2599 unprom_mult, new_vectype);
2600 gcall *mulhrs_stmt
2601 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2602 gimple_call_set_lhs (mulhrs_stmt, new_var);
2603 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2605 if (dump_enabled_p ())
2606 dump_printf_loc (MSG_NOTE, vect_location,
2607 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
2609 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2610 mulhrs_stmt, new_vectype);
2613 /* Recognize the patterns:
2615 ATYPE a; // narrower than TYPE
2616 BTYPE b; // narrower than TYPE
2617 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2618 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2620 where only the bottom half of avg is used. Try to transform them into:
2622 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2623 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2625 followed by:
2627 TYPE avg = (TYPE) avg';
2629 where NTYPE is no wider than half of TYPE. Since only the bottom half
2630 of avg is used, all or part of the cast of avg' should become redundant.
2632 If there is no target support available, generate code to distribute rshift
2633 over plus and add a carry. */
2635 static gimple *
2636 vect_recog_average_pattern (vec_info *vinfo,
2637 stmt_vec_info last_stmt_info, tree *type_out)
2639 /* Check for a shift right by one bit. */
2640 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2641 if (!last_stmt
2642 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2643 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2644 return NULL;
2646 /* Check that the shift result is wider than the users of the
2647 result need (i.e. that narrowing would be a natural choice). */
2648 tree lhs = gimple_assign_lhs (last_stmt);
2649 tree type = TREE_TYPE (lhs);
2650 unsigned int target_precision
2651 = vect_element_precision (last_stmt_info->min_output_precision);
2652 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2653 return NULL;
2655 /* Look through any change in sign on the shift input. */
2656 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2657 vect_unpromoted_value unprom_plus;
2658 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2659 &unprom_plus);
2660 if (!rshift_rhs
2661 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2662 return NULL;
2664 /* Get the definition of the shift input. */
2665 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2666 if (!plus_stmt_info)
2667 return NULL;
2669 /* Check whether the shift input can be seen as a tree of additions on
2670 2 or 3 widened inputs.
2672 Note that the pattern should be a win even if the result of one or
2673 more additions is reused elsewhere: if the pattern matches, we'd be
2674 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2675 internal_fn ifn = IFN_AVG_FLOOR;
2676 vect_unpromoted_value unprom[3];
2677 tree new_type;
2678 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2679 WIDEN_PLUS_EXPR, false, 3,
2680 unprom, &new_type);
2681 if (nops == 0)
2682 return NULL;
2683 if (nops == 3)
2685 /* Check that one operand is 1. */
2686 unsigned int i;
2687 for (i = 0; i < 3; ++i)
2688 if (integer_onep (unprom[i].op))
2689 break;
2690 if (i == 3)
2691 return NULL;
2692 /* Throw away the 1 operand and keep the other two. */
2693 if (i < 2)
2694 unprom[i] = unprom[2];
2695 ifn = IFN_AVG_CEIL;
2698 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2700 /* We know that:
2702 (a) the operation can be viewed as:
2704 TYPE widened0 = (TYPE) UNPROM[0];
2705 TYPE widened1 = (TYPE) UNPROM[1];
2706 TYPE tmp1 = widened0 + widened1 {+ 1};
2707 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2709 (b) the first two statements are equivalent to:
2711 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2712 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2714 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2715 where sensible;
2717 (d) all the operations can be performed correctly at twice the width of
2718 NEW_TYPE, due to the nature of the average operation; and
2720 (e) users of the result of the right shift need only TARGET_PRECISION
2721 bits, where TARGET_PRECISION is no more than half of TYPE's
2722 precision.
2724 Under these circumstances, the only situation in which NEW_TYPE
2725 could be narrower than TARGET_PRECISION is if widened0, widened1
2726 and an addition result are all used more than once. Thus we can
2727 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2728 as "free", whereas widening the result of the average instruction
2729 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2730 therefore better not to go narrower than TARGET_PRECISION. */
2731 if (TYPE_PRECISION (new_type) < target_precision)
2732 new_type = build_nonstandard_integer_type (target_precision,
2733 TYPE_UNSIGNED (new_type));
2735 /* Check for target support. */
2736 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2737 if (!new_vectype)
2738 return NULL;
2740 bool fallback_p = false;
2742 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2744 else if (TYPE_UNSIGNED (new_type)
2745 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2746 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2747 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2748 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2749 fallback_p = true;
2750 else
2751 return NULL;
2753 /* The IR requires a valid vector type for the cast result, even though
2754 it's likely to be discarded. */
2755 *type_out = get_vectype_for_scalar_type (vinfo, type);
2756 if (!*type_out)
2757 return NULL;
2759 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2760 tree new_ops[2];
2761 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2762 unprom, new_vectype);
2764 if (fallback_p)
2766 /* As a fallback, generate code for following sequence:
2768 shifted_op0 = new_ops[0] >> 1;
2769 shifted_op1 = new_ops[1] >> 1;
2770 sum_of_shifted = shifted_op0 + shifted_op1;
2771 unmasked_carry = new_ops[0] and/or new_ops[1];
2772 carry = unmasked_carry & 1;
2773 new_var = sum_of_shifted + carry;
2776 tree one_cst = build_one_cst (new_type);
2777 gassign *g;
2779 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2780 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2781 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2783 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2784 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2785 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2787 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2788 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2789 shifted_op0, shifted_op1);
2790 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2792 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2793 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2794 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2795 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2797 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2798 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2799 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2801 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2802 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2805 /* Generate the IFN_AVG* call. */
2806 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2807 new_ops[1]);
2808 gimple_call_set_lhs (average_stmt, new_var);
2809 gimple_set_location (average_stmt, gimple_location (last_stmt));
2811 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_NOTE, vect_location,
2813 "created pattern stmt: %G", (gimple *) average_stmt);
2815 return vect_convert_output (vinfo, last_stmt_info,
2816 type, average_stmt, new_vectype);
2819 /* Recognize cases in which the input to a cast is wider than its
2820 output, and the input is fed by a widening operation. Fold this
2821 by removing the unnecessary intermediate widening. E.g.:
2823 unsigned char a;
2824 unsigned int b = (unsigned int) a;
2825 unsigned short c = (unsigned short) b;
2829 unsigned short c = (unsigned short) a;
2831 Although this is rare in input IR, it is an expected side-effect
2832 of the over-widening pattern above.
2834 This is beneficial also for integer-to-float conversions, if the
2835 widened integer has more bits than the float, and if the unwidened
2836 input doesn't. */
2838 static gimple *
2839 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2840 stmt_vec_info last_stmt_info, tree *type_out)
2842 /* Check for a cast, including an integer-to-float conversion. */
2843 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2844 if (!last_stmt)
2845 return NULL;
2846 tree_code code = gimple_assign_rhs_code (last_stmt);
2847 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2848 return NULL;
2850 /* Make sure that the rhs is a scalar with a natural bitsize. */
2851 tree lhs = gimple_assign_lhs (last_stmt);
2852 if (!lhs)
2853 return NULL;
2854 tree lhs_type = TREE_TYPE (lhs);
2855 scalar_mode lhs_mode;
2856 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2857 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2858 return NULL;
2860 /* Check for a narrowing operation (from a vector point of view). */
2861 tree rhs = gimple_assign_rhs1 (last_stmt);
2862 tree rhs_type = TREE_TYPE (rhs);
2863 if (!INTEGRAL_TYPE_P (rhs_type)
2864 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2865 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2866 return NULL;
2868 /* Try to find an unpromoted input. */
2869 vect_unpromoted_value unprom;
2870 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2871 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2872 return NULL;
2874 /* If the bits above RHS_TYPE matter, make sure that they're the
2875 same when extending from UNPROM as they are when extending from RHS. */
2876 if (!INTEGRAL_TYPE_P (lhs_type)
2877 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2878 return NULL;
2880 /* We can get the same result by casting UNPROM directly, to avoid
2881 the unnecessary widening and narrowing. */
2882 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2884 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2885 if (!*type_out)
2886 return NULL;
2888 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2889 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2890 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2892 return pattern_stmt;
2895 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2896 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2898 static gimple *
2899 vect_recog_widen_shift_pattern (vec_info *vinfo,
2900 stmt_vec_info last_stmt_info, tree *type_out)
2902 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2903 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2904 "vect_recog_widen_shift_pattern");
2907 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2909 type a_t, b_t, c_t;
2911 S0 a_t = b_t r<< c_t;
2913 Input/Output:
2915 * STMT_VINFO: The stmt from which the pattern search begins,
2916 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2917 with a sequence:
2919 S1 d_t = -c_t;
2920 S2 e_t = d_t & (B - 1);
2921 S3 f_t = b_t << c_t;
2922 S4 g_t = b_t >> e_t;
2923 S0 a_t = f_t | g_t;
2925 where B is element bitsize of type.
2927 Output:
2929 * TYPE_OUT: The type of the output of this pattern.
2931 * Return value: A new stmt that will be used to replace the rotate
2932 S0 stmt. */
2934 static gimple *
2935 vect_recog_rotate_pattern (vec_info *vinfo,
2936 stmt_vec_info stmt_vinfo, tree *type_out)
2938 gimple *last_stmt = stmt_vinfo->stmt;
2939 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2940 gimple *pattern_stmt, *def_stmt;
2941 enum tree_code rhs_code;
2942 enum vect_def_type dt;
2943 optab optab1, optab2;
2944 edge ext_def = NULL;
2945 bool bswap16_p = false;
2947 if (is_gimple_assign (last_stmt))
2949 rhs_code = gimple_assign_rhs_code (last_stmt);
2950 switch (rhs_code)
2952 case LROTATE_EXPR:
2953 case RROTATE_EXPR:
2954 break;
2955 default:
2956 return NULL;
2959 lhs = gimple_assign_lhs (last_stmt);
2960 oprnd0 = gimple_assign_rhs1 (last_stmt);
2961 type = TREE_TYPE (oprnd0);
2962 oprnd1 = gimple_assign_rhs2 (last_stmt);
2964 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2966 /* __builtin_bswap16 (x) is another form of x r>> 8.
2967 The vectorizer has bswap support, but only if the argument isn't
2968 promoted. */
2969 lhs = gimple_call_lhs (last_stmt);
2970 oprnd0 = gimple_call_arg (last_stmt, 0);
2971 type = TREE_TYPE (oprnd0);
2972 if (!lhs
2973 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2974 || TYPE_PRECISION (type) <= 16
2975 || TREE_CODE (oprnd0) != SSA_NAME
2976 || BITS_PER_UNIT != 8)
2977 return NULL;
2979 stmt_vec_info def_stmt_info;
2980 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2981 return NULL;
2983 if (dt != vect_internal_def)
2984 return NULL;
2986 if (gimple_assign_cast_p (def_stmt))
2988 def = gimple_assign_rhs1 (def_stmt);
2989 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2990 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2991 oprnd0 = def;
2994 type = TREE_TYPE (lhs);
2995 vectype = get_vectype_for_scalar_type (vinfo, type);
2996 if (vectype == NULL_TREE)
2997 return NULL;
2999 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3001 /* The encoding uses one stepped pattern for each byte in the
3002 16-bit word. */
3003 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3004 for (unsigned i = 0; i < 3; ++i)
3005 for (unsigned j = 0; j < 2; ++j)
3006 elts.quick_push ((i + 1) * 2 - j - 1);
3008 vec_perm_indices indices (elts, 1,
3009 TYPE_VECTOR_SUBPARTS (char_vectype));
3010 machine_mode vmode = TYPE_MODE (char_vectype);
3011 if (can_vec_perm_const_p (vmode, vmode, indices))
3013 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3014 undo the argument promotion. */
3015 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3017 def = vect_recog_temp_ssa_var (type, NULL);
3018 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3019 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3020 oprnd0 = def;
3023 /* Pattern detected. */
3024 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3026 *type_out = vectype;
3028 /* Pattern supported. Create a stmt to be used to replace the
3029 pattern, with the unpromoted argument. */
3030 var = vect_recog_temp_ssa_var (type, NULL);
3031 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3032 1, oprnd0);
3033 gimple_call_set_lhs (pattern_stmt, var);
3034 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3035 gimple_call_fntype (last_stmt));
3036 return pattern_stmt;
3040 oprnd1 = build_int_cst (integer_type_node, 8);
3041 rhs_code = LROTATE_EXPR;
3042 bswap16_p = true;
3044 else
3045 return NULL;
3047 if (TREE_CODE (oprnd0) != SSA_NAME
3048 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
3049 || !INTEGRAL_TYPE_P (type))
3050 return NULL;
3052 stmt_vec_info def_stmt_info;
3053 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3054 return NULL;
3056 if (dt != vect_internal_def
3057 && dt != vect_constant_def
3058 && dt != vect_external_def)
3059 return NULL;
3061 vectype = get_vectype_for_scalar_type (vinfo, type);
3062 if (vectype == NULL_TREE)
3063 return NULL;
3065 /* If vector/vector or vector/scalar rotate is supported by the target,
3066 don't do anything here. */
3067 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3068 if (optab1
3069 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3071 use_rotate:
3072 if (bswap16_p)
3074 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3076 def = vect_recog_temp_ssa_var (type, NULL);
3077 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3078 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3079 oprnd0 = def;
3082 /* Pattern detected. */
3083 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3085 *type_out = vectype;
3087 /* Pattern supported. Create a stmt to be used to replace the
3088 pattern. */
3089 var = vect_recog_temp_ssa_var (type, NULL);
3090 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3091 oprnd1);
3092 return pattern_stmt;
3094 return NULL;
3097 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3099 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3100 if (optab2
3101 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3102 goto use_rotate;
3105 tree utype = unsigned_type_for (type);
3106 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3107 if (!uvectype)
3108 return NULL;
3110 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3111 don't do anything here either. */
3112 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3113 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3114 if (!optab1
3115 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3116 || !optab2
3117 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3119 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3120 return NULL;
3121 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3122 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3123 if (!optab1
3124 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3125 || !optab2
3126 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3127 return NULL;
3130 *type_out = vectype;
3132 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3134 def = vect_recog_temp_ssa_var (utype, NULL);
3135 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3136 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3137 oprnd0 = def;
3140 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3141 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3143 def = NULL_TREE;
3144 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3145 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3146 def = oprnd1;
3147 else if (def_stmt && gimple_assign_cast_p (def_stmt))
3149 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3150 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3151 && TYPE_PRECISION (TREE_TYPE (rhs1))
3152 == TYPE_PRECISION (type))
3153 def = rhs1;
3156 if (def == NULL_TREE)
3158 def = vect_recog_temp_ssa_var (utype, NULL);
3159 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3160 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3162 stype = TREE_TYPE (def);
3164 if (TREE_CODE (def) == INTEGER_CST)
3166 if (!tree_fits_uhwi_p (def)
3167 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3168 || integer_zerop (def))
3169 return NULL;
3170 def2 = build_int_cst (stype,
3171 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3173 else
3175 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3177 if (vecstype == NULL_TREE)
3178 return NULL;
3179 def2 = vect_recog_temp_ssa_var (stype, NULL);
3180 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3181 if (ext_def)
3183 basic_block new_bb
3184 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3185 gcc_assert (!new_bb);
3187 else
3188 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3190 def2 = vect_recog_temp_ssa_var (stype, NULL);
3191 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3192 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3193 gimple_assign_lhs (def_stmt), mask);
3194 if (ext_def)
3196 basic_block new_bb
3197 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3198 gcc_assert (!new_bb);
3200 else
3201 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3204 var1 = vect_recog_temp_ssa_var (utype, NULL);
3205 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3206 ? LSHIFT_EXPR : RSHIFT_EXPR,
3207 oprnd0, def);
3208 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3210 var2 = vect_recog_temp_ssa_var (utype, NULL);
3211 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3212 ? RSHIFT_EXPR : LSHIFT_EXPR,
3213 oprnd0, def2);
3214 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3216 /* Pattern detected. */
3217 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3219 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3220 var = vect_recog_temp_ssa_var (utype, NULL);
3221 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
3223 if (!useless_type_conversion_p (type, utype))
3225 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
3226 tree result = vect_recog_temp_ssa_var (type, NULL);
3227 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
3229 return pattern_stmt;
3232 /* Detect a vector by vector shift pattern that wouldn't be otherwise
3233 vectorized:
3235 type a_t;
3236 TYPE b_T, res_T;
3238 S1 a_t = ;
3239 S2 b_T = ;
3240 S3 res_T = b_T op a_t;
3242 where type 'TYPE' is a type with different size than 'type',
3243 and op is <<, >> or rotate.
3245 Also detect cases:
3247 type a_t;
3248 TYPE b_T, c_T, res_T;
3250 S0 c_T = ;
3251 S1 a_t = (type) c_T;
3252 S2 b_T = ;
3253 S3 res_T = b_T op a_t;
3255 Input/Output:
3257 * STMT_VINFO: The stmt from which the pattern search begins,
3258 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
3259 with a shift/rotate which has same type on both operands, in the
3260 second case just b_T op c_T, in the first case with added cast
3261 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
3263 Output:
3265 * TYPE_OUT: The type of the output of this pattern.
3267 * Return value: A new stmt that will be used to replace the shift/rotate
3268 S3 stmt. */
3270 static gimple *
3271 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
3272 stmt_vec_info stmt_vinfo,
3273 tree *type_out)
3275 gimple *last_stmt = stmt_vinfo->stmt;
3276 tree oprnd0, oprnd1, lhs, var;
3277 gimple *pattern_stmt;
3278 enum tree_code rhs_code;
3280 if (!is_gimple_assign (last_stmt))
3281 return NULL;
3283 rhs_code = gimple_assign_rhs_code (last_stmt);
3284 switch (rhs_code)
3286 case LSHIFT_EXPR:
3287 case RSHIFT_EXPR:
3288 case LROTATE_EXPR:
3289 case RROTATE_EXPR:
3290 break;
3291 default:
3292 return NULL;
3295 lhs = gimple_assign_lhs (last_stmt);
3296 oprnd0 = gimple_assign_rhs1 (last_stmt);
3297 oprnd1 = gimple_assign_rhs2 (last_stmt);
3298 if (TREE_CODE (oprnd0) != SSA_NAME
3299 || TREE_CODE (oprnd1) != SSA_NAME
3300 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
3301 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
3302 || TYPE_PRECISION (TREE_TYPE (lhs))
3303 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
3304 return NULL;
3306 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
3307 if (!def_vinfo)
3308 return NULL;
3310 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
3311 if (*type_out == NULL_TREE)
3312 return NULL;
3314 tree def = NULL_TREE;
3315 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
3316 if (def_stmt && gimple_assign_cast_p (def_stmt))
3318 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3319 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
3320 && TYPE_PRECISION (TREE_TYPE (rhs1))
3321 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
3323 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
3324 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
3325 def = rhs1;
3326 else
3328 tree mask
3329 = build_low_bits_mask (TREE_TYPE (rhs1),
3330 TYPE_PRECISION (TREE_TYPE (oprnd1)));
3331 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3332 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
3333 tree vecstype = get_vectype_for_scalar_type (vinfo,
3334 TREE_TYPE (rhs1));
3335 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3340 if (def == NULL_TREE)
3342 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
3343 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3344 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3347 /* Pattern detected. */
3348 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
3350 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3351 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
3352 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
3354 return pattern_stmt;
3357 /* Return true iff the target has a vector optab implementing the operation
3358 CODE on type VECTYPE. */
3360 static bool
3361 target_has_vecop_for_code (tree_code code, tree vectype)
3363 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
3364 return voptab
3365 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
3368 /* Verify that the target has optabs of VECTYPE to perform all the steps
3369 needed by the multiplication-by-immediate synthesis algorithm described by
3370 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
3371 present. Return true iff the target supports all the steps. */
3373 static bool
3374 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
3375 tree vectype, bool synth_shift_p)
3377 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
3378 return false;
3380 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
3381 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
3383 if (var == negate_variant
3384 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
3385 return false;
3387 /* If we must synthesize shifts with additions make sure that vector
3388 addition is available. */
3389 if ((var == add_variant || synth_shift_p) && !supports_vplus)
3390 return false;
3392 for (int i = 1; i < alg->ops; i++)
3394 switch (alg->op[i])
3396 case alg_shift:
3397 break;
3398 case alg_add_t_m2:
3399 case alg_add_t2_m:
3400 case alg_add_factor:
3401 if (!supports_vplus)
3402 return false;
3403 break;
3404 case alg_sub_t_m2:
3405 case alg_sub_t2_m:
3406 case alg_sub_factor:
3407 if (!supports_vminus)
3408 return false;
3409 break;
3410 case alg_unknown:
3411 case alg_m:
3412 case alg_zero:
3413 case alg_impossible:
3414 return false;
3415 default:
3416 gcc_unreachable ();
3420 return true;
3423 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3424 putting the final result in DEST. Append all statements but the last into
3425 VINFO. Return the last statement. */
3427 static gimple *
3428 synth_lshift_by_additions (vec_info *vinfo,
3429 tree dest, tree op, HOST_WIDE_INT amnt,
3430 stmt_vec_info stmt_info)
3432 HOST_WIDE_INT i;
3433 tree itype = TREE_TYPE (op);
3434 tree prev_res = op;
3435 gcc_assert (amnt >= 0);
3436 for (i = 0; i < amnt; i++)
3438 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
3439 : dest;
3440 gimple *stmt
3441 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
3442 prev_res = tmp_var;
3443 if (i < amnt - 1)
3444 append_pattern_def_seq (vinfo, stmt_info, stmt);
3445 else
3446 return stmt;
3448 gcc_unreachable ();
3449 return NULL;
3452 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
3453 CODE to operands OP1 and OP2, creating a new temporary SSA var in
3454 the process if necessary. Append the resulting assignment statements
3455 to the sequence in STMT_VINFO. Return the SSA variable that holds the
3456 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
3457 left shifts using additions. */
3459 static tree
3460 apply_binop_and_append_stmt (vec_info *vinfo,
3461 tree_code code, tree op1, tree op2,
3462 stmt_vec_info stmt_vinfo, bool synth_shift_p)
3464 if (integer_zerop (op2)
3465 && (code == LSHIFT_EXPR
3466 || code == PLUS_EXPR))
3468 gcc_assert (TREE_CODE (op1) == SSA_NAME);
3469 return op1;
3472 gimple *stmt;
3473 tree itype = TREE_TYPE (op1);
3474 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3476 if (code == LSHIFT_EXPR
3477 && synth_shift_p)
3479 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3480 TREE_INT_CST_LOW (op2), stmt_vinfo);
3481 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3482 return tmp_var;
3485 stmt = gimple_build_assign (tmp_var, code, op1, op2);
3486 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3487 return tmp_var;
3490 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3491 and simple arithmetic operations to be vectorized. Record the statements
3492 produced in STMT_VINFO and return the last statement in the sequence or
3493 NULL if it's not possible to synthesize such a multiplication.
3494 This function mirrors the behavior of expand_mult_const in expmed.cc but
3495 works on tree-ssa form. */
3497 static gimple *
3498 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3499 stmt_vec_info stmt_vinfo)
3501 tree itype = TREE_TYPE (op);
3502 machine_mode mode = TYPE_MODE (itype);
3503 struct algorithm alg;
3504 mult_variant variant;
3505 if (!tree_fits_shwi_p (val))
3506 return NULL;
3508 /* Multiplication synthesis by shifts, adds and subs can introduce
3509 signed overflow where the original operation didn't. Perform the
3510 operations on an unsigned type and cast back to avoid this.
3511 In the future we may want to relax this for synthesis algorithms
3512 that we can prove do not cause unexpected overflow. */
3513 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3515 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3516 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3517 if (!vectype)
3518 return NULL;
3520 /* Targets that don't support vector shifts but support vector additions
3521 can synthesize shifts that way. */
3522 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3524 HOST_WIDE_INT hwval = tree_to_shwi (val);
3525 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3526 The vectorizer's benefit analysis will decide whether it's beneficial
3527 to do this. */
3528 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
3529 ? TYPE_MODE (vectype) : mode,
3530 hwval, &alg, &variant, MAX_COST);
3531 if (!possible)
3532 return NULL;
3534 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
3535 return NULL;
3537 tree accumulator;
3539 /* Clear out the sequence of statements so we can populate it below. */
3540 gimple *stmt = NULL;
3542 if (cast_to_unsigned_p)
3544 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3545 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3546 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3547 op = tmp_op;
3550 if (alg.op[0] == alg_zero)
3551 accumulator = build_int_cst (multtype, 0);
3552 else
3553 accumulator = op;
3555 bool needs_fixup = (variant == negate_variant)
3556 || (variant == add_variant);
3558 for (int i = 1; i < alg.ops; i++)
3560 tree shft_log = build_int_cst (multtype, alg.log[i]);
3561 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3562 tree tmp_var = NULL_TREE;
3564 switch (alg.op[i])
3566 case alg_shift:
3567 if (synth_shift_p)
3568 stmt
3569 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3570 alg.log[i], stmt_vinfo);
3571 else
3572 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3573 shft_log);
3574 break;
3575 case alg_add_t_m2:
3576 tmp_var
3577 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3578 stmt_vinfo, synth_shift_p);
3579 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3580 tmp_var);
3581 break;
3582 case alg_sub_t_m2:
3583 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3584 shft_log, stmt_vinfo,
3585 synth_shift_p);
3586 /* In some algorithms the first step involves zeroing the
3587 accumulator. If subtracting from such an accumulator
3588 just emit the negation directly. */
3589 if (integer_zerop (accumulator))
3590 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3591 else
3592 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3593 tmp_var);
3594 break;
3595 case alg_add_t2_m:
3596 tmp_var
3597 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3598 shft_log, stmt_vinfo, synth_shift_p);
3599 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3600 break;
3601 case alg_sub_t2_m:
3602 tmp_var
3603 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3604 shft_log, stmt_vinfo, synth_shift_p);
3605 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3606 break;
3607 case alg_add_factor:
3608 tmp_var
3609 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3610 shft_log, stmt_vinfo, synth_shift_p);
3611 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3612 tmp_var);
3613 break;
3614 case alg_sub_factor:
3615 tmp_var
3616 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3617 shft_log, stmt_vinfo, synth_shift_p);
3618 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3619 accumulator);
3620 break;
3621 default:
3622 gcc_unreachable ();
3624 /* We don't want to append the last stmt in the sequence to stmt_vinfo
3625 but rather return it directly. */
3627 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3628 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3629 accumulator = accum_tmp;
3631 if (variant == negate_variant)
3633 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3634 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3635 accumulator = accum_tmp;
3636 if (cast_to_unsigned_p)
3637 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3639 else if (variant == add_variant)
3641 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3642 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3643 accumulator = accum_tmp;
3644 if (cast_to_unsigned_p)
3645 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3647 /* Move back to a signed if needed. */
3648 if (cast_to_unsigned_p)
3650 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3651 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3654 return stmt;
3657 /* Detect multiplication by constant and convert it into a sequence of
3658 shifts and additions, subtractions, negations. We reuse the
3659 choose_mult_variant algorithms from expmed.cc
3661 Input/Output:
3663 STMT_VINFO: The stmt from which the pattern search begins,
3664 i.e. the mult stmt.
3666 Output:
3668 * TYPE_OUT: The type of the output of this pattern.
3670 * Return value: A new stmt that will be used to replace
3671 the multiplication. */
3673 static gimple *
3674 vect_recog_mult_pattern (vec_info *vinfo,
3675 stmt_vec_info stmt_vinfo, tree *type_out)
3677 gimple *last_stmt = stmt_vinfo->stmt;
3678 tree oprnd0, oprnd1, vectype, itype;
3679 gimple *pattern_stmt;
3681 if (!is_gimple_assign (last_stmt))
3682 return NULL;
3684 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3685 return NULL;
3687 oprnd0 = gimple_assign_rhs1 (last_stmt);
3688 oprnd1 = gimple_assign_rhs2 (last_stmt);
3689 itype = TREE_TYPE (oprnd0);
3691 if (TREE_CODE (oprnd0) != SSA_NAME
3692 || TREE_CODE (oprnd1) != INTEGER_CST
3693 || !INTEGRAL_TYPE_P (itype)
3694 || !type_has_mode_precision_p (itype))
3695 return NULL;
3697 vectype = get_vectype_for_scalar_type (vinfo, itype);
3698 if (vectype == NULL_TREE)
3699 return NULL;
3701 /* If the target can handle vectorized multiplication natively,
3702 don't attempt to optimize this. */
3703 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3704 if (mul_optab != unknown_optab)
3706 machine_mode vec_mode = TYPE_MODE (vectype);
3707 int icode = (int) optab_handler (mul_optab, vec_mode);
3708 if (icode != CODE_FOR_nothing)
3709 return NULL;
3712 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3713 oprnd0, oprnd1, stmt_vinfo);
3714 if (!pattern_stmt)
3715 return NULL;
3717 /* Pattern detected. */
3718 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3720 *type_out = vectype;
3722 return pattern_stmt;
3725 /* Detect a signed division by a constant that wouldn't be
3726 otherwise vectorized:
3728 type a_t, b_t;
3730 S1 a_t = b_t / N;
3732 where type 'type' is an integral type and N is a constant.
3734 Similarly handle modulo by a constant:
3736 S4 a_t = b_t % N;
3738 Input/Output:
3740 * STMT_VINFO: The stmt from which the pattern search begins,
3741 i.e. the division stmt. S1 is replaced by if N is a power
3742 of two constant and type is signed:
3743 S3 y_t = b_t < 0 ? N - 1 : 0;
3744 S2 x_t = b_t + y_t;
3745 S1' a_t = x_t >> log2 (N);
3747 S4 is replaced if N is a power of two constant and
3748 type is signed by (where *_T temporaries have unsigned type):
3749 S9 y_T = b_t < 0 ? -1U : 0U;
3750 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3751 S7 z_t = (type) z_T;
3752 S6 w_t = b_t + z_t;
3753 S5 x_t = w_t & (N - 1);
3754 S4' a_t = x_t - z_t;
3756 Output:
3758 * TYPE_OUT: The type of the output of this pattern.
3760 * Return value: A new stmt that will be used to replace the division
3761 S1 or modulo S4 stmt. */
3763 static gimple *
3764 vect_recog_divmod_pattern (vec_info *vinfo,
3765 stmt_vec_info stmt_vinfo, tree *type_out)
3767 gimple *last_stmt = stmt_vinfo->stmt;
3768 tree oprnd0, oprnd1, vectype, itype, cond;
3769 gimple *pattern_stmt, *def_stmt;
3770 enum tree_code rhs_code;
3771 optab optab;
3772 tree q, cst;
3773 int dummy_int, prec;
3775 if (!is_gimple_assign (last_stmt))
3776 return NULL;
3778 rhs_code = gimple_assign_rhs_code (last_stmt);
3779 switch (rhs_code)
3781 case TRUNC_DIV_EXPR:
3782 case EXACT_DIV_EXPR:
3783 case TRUNC_MOD_EXPR:
3784 break;
3785 default:
3786 return NULL;
3789 oprnd0 = gimple_assign_rhs1 (last_stmt);
3790 oprnd1 = gimple_assign_rhs2 (last_stmt);
3791 itype = TREE_TYPE (oprnd0);
3792 if (TREE_CODE (oprnd0) != SSA_NAME
3793 || TREE_CODE (oprnd1) != INTEGER_CST
3794 || TREE_CODE (itype) != INTEGER_TYPE
3795 || !type_has_mode_precision_p (itype))
3796 return NULL;
3798 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3799 vectype = get_vectype_for_scalar_type (vinfo, itype);
3800 if (vectype == NULL_TREE)
3801 return NULL;
3803 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3805 /* If the target can handle vectorized division or modulo natively,
3806 don't attempt to optimize this, since native division is likely
3807 to give smaller code. */
3808 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3809 if (optab != unknown_optab)
3811 machine_mode vec_mode = TYPE_MODE (vectype);
3812 int icode = (int) optab_handler (optab, vec_mode);
3813 if (icode != CODE_FOR_nothing)
3814 return NULL;
3818 prec = TYPE_PRECISION (itype);
3819 if (integer_pow2p (oprnd1))
3821 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3822 return NULL;
3824 /* Pattern detected. */
3825 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3827 *type_out = vectype;
3829 /* Check if the target supports this internal function. */
3830 internal_fn ifn = IFN_DIV_POW2;
3831 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3833 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3835 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3836 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3837 gimple_call_set_lhs (div_stmt, var_div);
3839 if (rhs_code == TRUNC_MOD_EXPR)
3841 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3842 def_stmt
3843 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3844 LSHIFT_EXPR, var_div, shift);
3845 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3846 pattern_stmt
3847 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3848 MINUS_EXPR, oprnd0,
3849 gimple_assign_lhs (def_stmt));
3851 else
3852 pattern_stmt = div_stmt;
3853 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3855 return pattern_stmt;
3858 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3859 build_int_cst (itype, 0));
3860 if (rhs_code == TRUNC_DIV_EXPR
3861 || rhs_code == EXACT_DIV_EXPR)
3863 tree var = vect_recog_temp_ssa_var (itype, NULL);
3864 tree shift;
3865 def_stmt
3866 = gimple_build_assign (var, COND_EXPR, cond,
3867 fold_build2 (MINUS_EXPR, itype, oprnd1,
3868 build_int_cst (itype, 1)),
3869 build_int_cst (itype, 0));
3870 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3871 var = vect_recog_temp_ssa_var (itype, NULL);
3872 def_stmt
3873 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3874 gimple_assign_lhs (def_stmt));
3875 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3877 shift = build_int_cst (itype, tree_log2 (oprnd1));
3878 pattern_stmt
3879 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3880 RSHIFT_EXPR, var, shift);
3882 else
3884 tree signmask;
3885 if (compare_tree_int (oprnd1, 2) == 0)
3887 signmask = vect_recog_temp_ssa_var (itype, NULL);
3888 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3889 build_int_cst (itype, 1),
3890 build_int_cst (itype, 0));
3891 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3893 else
3895 tree utype
3896 = build_nonstandard_integer_type (prec, 1);
3897 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3898 tree shift
3899 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3900 - tree_log2 (oprnd1));
3901 tree var = vect_recog_temp_ssa_var (utype, NULL);
3903 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3904 build_int_cst (utype, -1),
3905 build_int_cst (utype, 0));
3906 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3907 var = vect_recog_temp_ssa_var (utype, NULL);
3908 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3909 gimple_assign_lhs (def_stmt),
3910 shift);
3911 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3912 signmask = vect_recog_temp_ssa_var (itype, NULL);
3913 def_stmt
3914 = gimple_build_assign (signmask, NOP_EXPR, var);
3915 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3917 def_stmt
3918 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3919 PLUS_EXPR, oprnd0, signmask);
3920 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3921 def_stmt
3922 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3923 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3924 fold_build2 (MINUS_EXPR, itype, oprnd1,
3925 build_int_cst (itype, 1)));
3926 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3928 pattern_stmt
3929 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3930 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3931 signmask);
3934 return pattern_stmt;
3936 else if ((cst = uniform_integer_cst_p (oprnd1))
3937 && targetm.vectorize.can_special_div_by_const (rhs_code, vectype,
3938 wi::to_wide (cst),
3939 NULL, NULL_RTX,
3940 NULL_RTX))
3942 return NULL;
3945 if (prec > HOST_BITS_PER_WIDE_INT
3946 || integer_zerop (oprnd1))
3947 return NULL;
3949 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3950 return NULL;
3952 if (TYPE_UNSIGNED (itype))
3954 unsigned HOST_WIDE_INT mh, ml;
3955 int pre_shift, post_shift;
3956 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3957 & GET_MODE_MASK (itype_mode));
3958 tree t1, t2, t3, t4;
3960 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3961 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3962 return NULL;
3964 /* Find a suitable multiplier and right shift count
3965 instead of multiplying with D. */
3966 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3968 /* If the suggested multiplier is more than SIZE bits, we can do better
3969 for even divisors, using an initial right shift. */
3970 if (mh != 0 && (d & 1) == 0)
3972 pre_shift = ctz_or_zero (d);
3973 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3974 &ml, &post_shift, &dummy_int);
3975 gcc_assert (!mh);
3977 else
3978 pre_shift = 0;
3980 if (mh != 0)
3982 if (post_shift - 1 >= prec)
3983 return NULL;
3985 /* t1 = oprnd0 h* ml;
3986 t2 = oprnd0 - t1;
3987 t3 = t2 >> 1;
3988 t4 = t1 + t3;
3989 q = t4 >> (post_shift - 1); */
3990 t1 = vect_recog_temp_ssa_var (itype, NULL);
3991 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3992 build_int_cst (itype, ml));
3993 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3995 t2 = vect_recog_temp_ssa_var (itype, NULL);
3996 def_stmt
3997 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3998 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4000 t3 = vect_recog_temp_ssa_var (itype, NULL);
4001 def_stmt
4002 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
4003 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4005 t4 = vect_recog_temp_ssa_var (itype, NULL);
4006 def_stmt
4007 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
4009 if (post_shift != 1)
4011 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4013 q = vect_recog_temp_ssa_var (itype, NULL);
4014 pattern_stmt
4015 = gimple_build_assign (q, RSHIFT_EXPR, t4,
4016 build_int_cst (itype, post_shift - 1));
4018 else
4020 q = t4;
4021 pattern_stmt = def_stmt;
4024 else
4026 if (pre_shift >= prec || post_shift >= prec)
4027 return NULL;
4029 /* t1 = oprnd0 >> pre_shift;
4030 t2 = t1 h* ml;
4031 q = t2 >> post_shift; */
4032 if (pre_shift)
4034 t1 = vect_recog_temp_ssa_var (itype, NULL);
4035 def_stmt
4036 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4037 build_int_cst (NULL, pre_shift));
4038 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4040 else
4041 t1 = oprnd0;
4043 t2 = vect_recog_temp_ssa_var (itype, NULL);
4044 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4045 build_int_cst (itype, ml));
4047 if (post_shift)
4049 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4051 q = vect_recog_temp_ssa_var (itype, NULL);
4052 def_stmt
4053 = gimple_build_assign (q, RSHIFT_EXPR, t2,
4054 build_int_cst (itype, post_shift));
4056 else
4057 q = t2;
4059 pattern_stmt = def_stmt;
4062 else
4064 unsigned HOST_WIDE_INT ml;
4065 int post_shift;
4066 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4067 unsigned HOST_WIDE_INT abs_d;
4068 bool add = false;
4069 tree t1, t2, t3, t4;
4071 /* Give up for -1. */
4072 if (d == -1)
4073 return NULL;
4075 /* Since d might be INT_MIN, we have to cast to
4076 unsigned HOST_WIDE_INT before negating to avoid
4077 undefined signed overflow. */
4078 abs_d = (d >= 0
4079 ? (unsigned HOST_WIDE_INT) d
4080 : - (unsigned HOST_WIDE_INT) d);
4082 /* n rem d = n rem -d */
4083 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4085 d = abs_d;
4086 oprnd1 = build_int_cst (itype, abs_d);
4088 if (HOST_BITS_PER_WIDE_INT >= prec
4089 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4090 /* This case is not handled correctly below. */
4091 return NULL;
4093 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4094 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4096 add = true;
4097 ml |= HOST_WIDE_INT_M1U << (prec - 1);
4099 if (post_shift >= prec)
4100 return NULL;
4102 /* t1 = oprnd0 h* ml; */
4103 t1 = vect_recog_temp_ssa_var (itype, NULL);
4104 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4105 build_int_cst (itype, ml));
4107 if (add)
4109 /* t2 = t1 + oprnd0; */
4110 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4111 t2 = vect_recog_temp_ssa_var (itype, NULL);
4112 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4114 else
4115 t2 = t1;
4117 if (post_shift)
4119 /* t3 = t2 >> post_shift; */
4120 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4121 t3 = vect_recog_temp_ssa_var (itype, NULL);
4122 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
4123 build_int_cst (itype, post_shift));
4125 else
4126 t3 = t2;
4128 int msb = 1;
4129 value_range r;
4130 get_range_query (cfun)->range_of_expr (r, oprnd0);
4131 if (r.kind () == VR_RANGE)
4133 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
4134 msb = 0;
4135 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
4136 msb = -1;
4139 if (msb == 0 && d >= 0)
4141 /* q = t3; */
4142 q = t3;
4143 pattern_stmt = def_stmt;
4145 else
4147 /* t4 = oprnd0 >> (prec - 1);
4148 or if we know from VRP that oprnd0 >= 0
4149 t4 = 0;
4150 or if we know from VRP that oprnd0 < 0
4151 t4 = -1; */
4152 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4153 t4 = vect_recog_temp_ssa_var (itype, NULL);
4154 if (msb != 1)
4155 def_stmt = gimple_build_assign (t4, INTEGER_CST,
4156 build_int_cst (itype, msb));
4157 else
4158 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
4159 build_int_cst (itype, prec - 1));
4160 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4162 /* q = t3 - t4; or q = t4 - t3; */
4163 q = vect_recog_temp_ssa_var (itype, NULL);
4164 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
4165 d < 0 ? t3 : t4);
4169 if (rhs_code == TRUNC_MOD_EXPR)
4171 tree r, t1;
4173 /* We divided. Now finish by:
4174 t1 = q * oprnd1;
4175 r = oprnd0 - t1; */
4176 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
4178 t1 = vect_recog_temp_ssa_var (itype, NULL);
4179 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
4180 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4182 r = vect_recog_temp_ssa_var (itype, NULL);
4183 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
4186 /* Pattern detected. */
4187 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4189 *type_out = vectype;
4190 return pattern_stmt;
4193 /* Function vect_recog_mixed_size_cond_pattern
4195 Try to find the following pattern:
4197 type x_t, y_t;
4198 TYPE a_T, b_T, c_T;
4199 loop:
4200 S1 a_T = x_t CMP y_t ? b_T : c_T;
4202 where type 'TYPE' is an integral type which has different size
4203 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
4204 than 'type', the constants need to fit into an integer type
4205 with the same width as 'type') or results of conversion from 'type'.
4207 Input:
4209 * STMT_VINFO: The stmt from which the pattern search begins.
4211 Output:
4213 * TYPE_OUT: The type of the output of this pattern.
4215 * Return value: A new stmt that will be used to replace the pattern.
4216 Additionally a def_stmt is added.
4218 a_it = x_t CMP y_t ? b_it : c_it;
4219 a_T = (TYPE) a_it; */
4221 static gimple *
4222 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
4223 stmt_vec_info stmt_vinfo, tree *type_out)
4225 gimple *last_stmt = stmt_vinfo->stmt;
4226 tree cond_expr, then_clause, else_clause;
4227 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
4228 gimple *pattern_stmt, *def_stmt;
4229 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
4230 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
4231 bool promotion;
4232 tree comp_scalar_type;
4234 if (!is_gimple_assign (last_stmt)
4235 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
4236 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
4237 return NULL;
4239 cond_expr = gimple_assign_rhs1 (last_stmt);
4240 then_clause = gimple_assign_rhs2 (last_stmt);
4241 else_clause = gimple_assign_rhs3 (last_stmt);
4243 if (!COMPARISON_CLASS_P (cond_expr))
4244 return NULL;
4246 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
4247 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
4248 if (comp_vectype == NULL_TREE)
4249 return NULL;
4251 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
4252 if (types_compatible_p (type, comp_scalar_type)
4253 || ((TREE_CODE (then_clause) != INTEGER_CST
4254 || TREE_CODE (else_clause) != INTEGER_CST)
4255 && !INTEGRAL_TYPE_P (comp_scalar_type))
4256 || !INTEGRAL_TYPE_P (type))
4257 return NULL;
4259 if ((TREE_CODE (then_clause) != INTEGER_CST
4260 && !type_conversion_p (vinfo, then_clause, false,
4261 &orig_type0, &def_stmt0, &promotion))
4262 || (TREE_CODE (else_clause) != INTEGER_CST
4263 && !type_conversion_p (vinfo, else_clause, false,
4264 &orig_type1, &def_stmt1, &promotion)))
4265 return NULL;
4267 if (orig_type0 && orig_type1
4268 && !types_compatible_p (orig_type0, orig_type1))
4269 return NULL;
4271 if (orig_type0)
4273 if (!types_compatible_p (orig_type0, comp_scalar_type))
4274 return NULL;
4275 then_clause = gimple_assign_rhs1 (def_stmt0);
4276 itype = orig_type0;
4279 if (orig_type1)
4281 if (!types_compatible_p (orig_type1, comp_scalar_type))
4282 return NULL;
4283 else_clause = gimple_assign_rhs1 (def_stmt1);
4284 itype = orig_type1;
4288 HOST_WIDE_INT cmp_mode_size
4289 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
4291 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
4292 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
4293 return NULL;
4295 vectype = get_vectype_for_scalar_type (vinfo, type);
4296 if (vectype == NULL_TREE)
4297 return NULL;
4299 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
4300 return NULL;
4302 if (itype == NULL_TREE)
4303 itype = build_nonstandard_integer_type (cmp_mode_size,
4304 TYPE_UNSIGNED (type));
4306 if (itype == NULL_TREE
4307 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
4308 return NULL;
4310 vecitype = get_vectype_for_scalar_type (vinfo, itype);
4311 if (vecitype == NULL_TREE)
4312 return NULL;
4314 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
4315 return NULL;
4317 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
4319 if ((TREE_CODE (then_clause) == INTEGER_CST
4320 && !int_fits_type_p (then_clause, itype))
4321 || (TREE_CODE (else_clause) == INTEGER_CST
4322 && !int_fits_type_p (else_clause, itype)))
4323 return NULL;
4326 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4327 COND_EXPR, unshare_expr (cond_expr),
4328 fold_convert (itype, then_clause),
4329 fold_convert (itype, else_clause));
4330 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4331 NOP_EXPR, gimple_assign_lhs (def_stmt));
4333 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
4334 *type_out = vectype;
4336 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
4338 return pattern_stmt;
4342 /* Helper function of vect_recog_bool_pattern. Called recursively, return
4343 true if bool VAR can and should be optimized that way. Assume it shouldn't
4344 in case it's a result of a comparison which can be directly vectorized into
4345 a vector comparison. Fills in STMTS with all stmts visited during the
4346 walk. */
4348 static bool
4349 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
4351 tree rhs1;
4352 enum tree_code rhs_code;
4354 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4355 if (!def_stmt_info)
4356 return false;
4358 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
4359 if (!def_stmt)
4360 return false;
4362 if (stmts.contains (def_stmt))
4363 return true;
4365 rhs1 = gimple_assign_rhs1 (def_stmt);
4366 rhs_code = gimple_assign_rhs_code (def_stmt);
4367 switch (rhs_code)
4369 case SSA_NAME:
4370 if (! check_bool_pattern (rhs1, vinfo, stmts))
4371 return false;
4372 break;
4374 CASE_CONVERT:
4375 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
4376 return false;
4377 if (! check_bool_pattern (rhs1, vinfo, stmts))
4378 return false;
4379 break;
4381 case BIT_NOT_EXPR:
4382 if (! check_bool_pattern (rhs1, vinfo, stmts))
4383 return false;
4384 break;
4386 case BIT_AND_EXPR:
4387 case BIT_IOR_EXPR:
4388 case BIT_XOR_EXPR:
4389 if (! check_bool_pattern (rhs1, vinfo, stmts)
4390 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
4391 return false;
4392 break;
4394 default:
4395 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
4397 tree vecitype, comp_vectype;
4399 /* If the comparison can throw, then is_gimple_condexpr will be
4400 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
4401 if (stmt_could_throw_p (cfun, def_stmt))
4402 return false;
4404 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
4405 if (comp_vectype == NULL_TREE)
4406 return false;
4408 tree mask_type = get_mask_type_for_scalar_type (vinfo,
4409 TREE_TYPE (rhs1));
4410 if (mask_type
4411 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
4412 return false;
4414 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
4416 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4417 tree itype
4418 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4419 vecitype = get_vectype_for_scalar_type (vinfo, itype);
4420 if (vecitype == NULL_TREE)
4421 return false;
4423 else
4424 vecitype = comp_vectype;
4425 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
4426 return false;
4428 else
4429 return false;
4430 break;
4433 bool res = stmts.add (def_stmt);
4434 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
4435 gcc_assert (!res);
4437 return true;
4441 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
4442 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4443 pattern sequence. */
4445 static tree
4446 adjust_bool_pattern_cast (vec_info *vinfo,
4447 tree type, tree var, stmt_vec_info stmt_info)
4449 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4450 NOP_EXPR, var);
4451 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
4452 get_vectype_for_scalar_type (vinfo, type));
4453 return gimple_assign_lhs (cast_stmt);
4456 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
4457 VAR is an SSA_NAME that should be transformed from bool to a wider integer
4458 type, OUT_TYPE is the desired final integer type of the whole pattern.
4459 STMT_INFO is the info of the pattern root and is where pattern stmts should
4460 be associated with. DEFS is a map of pattern defs. */
4462 static void
4463 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
4464 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
4466 gimple *stmt = SSA_NAME_DEF_STMT (var);
4467 enum tree_code rhs_code, def_rhs_code;
4468 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
4469 location_t loc;
4470 gimple *pattern_stmt, *def_stmt;
4471 tree trueval = NULL_TREE;
4473 rhs1 = gimple_assign_rhs1 (stmt);
4474 rhs2 = gimple_assign_rhs2 (stmt);
4475 rhs_code = gimple_assign_rhs_code (stmt);
4476 loc = gimple_location (stmt);
4477 switch (rhs_code)
4479 case SSA_NAME:
4480 CASE_CONVERT:
4481 irhs1 = *defs.get (rhs1);
4482 itype = TREE_TYPE (irhs1);
4483 pattern_stmt
4484 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4485 SSA_NAME, irhs1);
4486 break;
4488 case BIT_NOT_EXPR:
4489 irhs1 = *defs.get (rhs1);
4490 itype = TREE_TYPE (irhs1);
4491 pattern_stmt
4492 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4493 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4494 break;
4496 case BIT_AND_EXPR:
4497 /* Try to optimize x = y & (a < b ? 1 : 0); into
4498 x = (a < b ? y : 0);
4500 E.g. for:
4501 bool a_b, b_b, c_b;
4502 TYPE d_T;
4504 S1 a_b = x1 CMP1 y1;
4505 S2 b_b = x2 CMP2 y2;
4506 S3 c_b = a_b & b_b;
4507 S4 d_T = (TYPE) c_b;
4509 we would normally emit:
4511 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4512 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4513 S3' c_T = a_T & b_T;
4514 S4' d_T = c_T;
4516 but we can save one stmt by using the
4517 result of one of the COND_EXPRs in the other COND_EXPR and leave
4518 BIT_AND_EXPR stmt out:
4520 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4521 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4522 S4' f_T = c_T;
4524 At least when VEC_COND_EXPR is implemented using masks
4525 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4526 computes the comparison masks and ands it, in one case with
4527 all ones vector, in the other case with a vector register.
4528 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4529 often more expensive. */
4530 def_stmt = SSA_NAME_DEF_STMT (rhs2);
4531 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4532 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4534 irhs1 = *defs.get (rhs1);
4535 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4536 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4537 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4539 rhs_code = def_rhs_code;
4540 rhs1 = def_rhs1;
4541 rhs2 = gimple_assign_rhs2 (def_stmt);
4542 trueval = irhs1;
4543 goto do_compare;
4545 else
4546 irhs2 = *defs.get (rhs2);
4547 goto and_ior_xor;
4549 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4550 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4551 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4553 irhs2 = *defs.get (rhs2);
4554 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4555 if (TYPE_PRECISION (TREE_TYPE (irhs2))
4556 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4558 rhs_code = def_rhs_code;
4559 rhs1 = def_rhs1;
4560 rhs2 = gimple_assign_rhs2 (def_stmt);
4561 trueval = irhs2;
4562 goto do_compare;
4564 else
4565 irhs1 = *defs.get (rhs1);
4566 goto and_ior_xor;
4568 /* FALLTHRU */
4569 case BIT_IOR_EXPR:
4570 case BIT_XOR_EXPR:
4571 irhs1 = *defs.get (rhs1);
4572 irhs2 = *defs.get (rhs2);
4573 and_ior_xor:
4574 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4575 != TYPE_PRECISION (TREE_TYPE (irhs2)))
4577 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4578 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4579 int out_prec = TYPE_PRECISION (out_type);
4580 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4581 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4582 stmt_info);
4583 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4584 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4585 stmt_info);
4586 else
4588 irhs1 = adjust_bool_pattern_cast (vinfo,
4589 out_type, irhs1, stmt_info);
4590 irhs2 = adjust_bool_pattern_cast (vinfo,
4591 out_type, irhs2, stmt_info);
4594 itype = TREE_TYPE (irhs1);
4595 pattern_stmt
4596 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4597 rhs_code, irhs1, irhs2);
4598 break;
4600 default:
4601 do_compare:
4602 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4603 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4604 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4605 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4606 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4608 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4609 itype
4610 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4612 else
4613 itype = TREE_TYPE (rhs1);
4614 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4615 if (trueval == NULL_TREE)
4616 trueval = build_int_cst (itype, 1);
4617 else
4618 gcc_checking_assert (useless_type_conversion_p (itype,
4619 TREE_TYPE (trueval)));
4620 pattern_stmt
4621 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4622 COND_EXPR, cond_expr, trueval,
4623 build_int_cst (itype, 0));
4624 break;
4627 gimple_set_location (pattern_stmt, loc);
4628 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4629 get_vectype_for_scalar_type (vinfo, itype));
4630 defs.put (var, gimple_assign_lhs (pattern_stmt));
4633 /* Comparison function to qsort a vector of gimple stmts after UID. */
4635 static int
4636 sort_after_uid (const void *p1, const void *p2)
4638 const gimple *stmt1 = *(const gimple * const *)p1;
4639 const gimple *stmt2 = *(const gimple * const *)p2;
4640 return gimple_uid (stmt1) - gimple_uid (stmt2);
4643 /* Create pattern stmts for all stmts participating in the bool pattern
4644 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4645 OUT_TYPE. Return the def of the pattern root. */
4647 static tree
4648 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4649 tree out_type, stmt_vec_info stmt_info)
4651 /* Gather original stmts in the bool pattern in their order of appearance
4652 in the IL. */
4653 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4654 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4655 i != bool_stmt_set.end (); ++i)
4656 bool_stmts.quick_push (*i);
4657 bool_stmts.qsort (sort_after_uid);
4659 /* Now process them in that order, producing pattern stmts. */
4660 hash_map <tree, tree> defs;
4661 for (unsigned i = 0; i < bool_stmts.length (); ++i)
4662 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4663 out_type, stmt_info, defs);
4665 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4666 gimple *pattern_stmt
4667 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4668 return gimple_assign_lhs (pattern_stmt);
4671 /* Return the proper type for converting bool VAR into
4672 an integer value or NULL_TREE if no such type exists.
4673 The type is chosen so that the converted value has the
4674 same number of elements as VAR's vector type. */
4676 static tree
4677 integer_type_for_mask (tree var, vec_info *vinfo)
4679 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4680 return NULL_TREE;
4682 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4683 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4684 return NULL_TREE;
4686 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4689 /* Function vect_recog_bool_pattern
4691 Try to find pattern like following:
4693 bool a_b, b_b, c_b, d_b, e_b;
4694 TYPE f_T;
4695 loop:
4696 S1 a_b = x1 CMP1 y1;
4697 S2 b_b = x2 CMP2 y2;
4698 S3 c_b = a_b & b_b;
4699 S4 d_b = x3 CMP3 y3;
4700 S5 e_b = c_b | d_b;
4701 S6 f_T = (TYPE) e_b;
4703 where type 'TYPE' is an integral type. Or a similar pattern
4704 ending in
4706 S6 f_Y = e_b ? r_Y : s_Y;
4708 as results from if-conversion of a complex condition.
4710 Input:
4712 * STMT_VINFO: The stmt at the end from which the pattern
4713 search begins, i.e. cast of a bool to
4714 an integer type.
4716 Output:
4718 * TYPE_OUT: The type of the output of this pattern.
4720 * Return value: A new stmt that will be used to replace the pattern.
4722 Assuming size of TYPE is the same as size of all comparisons
4723 (otherwise some casts would be added where needed), the above
4724 sequence we create related pattern stmts:
4725 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4726 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4727 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4728 S5' e_T = c_T | d_T;
4729 S6' f_T = e_T;
4731 Instead of the above S3' we could emit:
4732 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4733 S3' c_T = a_T | b_T;
4734 but the above is more efficient. */
4736 static gimple *
4737 vect_recog_bool_pattern (vec_info *vinfo,
4738 stmt_vec_info stmt_vinfo, tree *type_out)
4740 gimple *last_stmt = stmt_vinfo->stmt;
4741 enum tree_code rhs_code;
4742 tree var, lhs, rhs, vectype;
4743 gimple *pattern_stmt;
4745 if (!is_gimple_assign (last_stmt))
4746 return NULL;
4748 var = gimple_assign_rhs1 (last_stmt);
4749 lhs = gimple_assign_lhs (last_stmt);
4750 rhs_code = gimple_assign_rhs_code (last_stmt);
4752 if (rhs_code == VIEW_CONVERT_EXPR)
4753 var = TREE_OPERAND (var, 0);
4755 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4756 return NULL;
4758 hash_set<gimple *> bool_stmts;
4760 if (CONVERT_EXPR_CODE_P (rhs_code)
4761 || rhs_code == VIEW_CONVERT_EXPR)
4763 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4764 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4765 return NULL;
4766 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4768 if (check_bool_pattern (var, vinfo, bool_stmts))
4770 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4771 TREE_TYPE (lhs), stmt_vinfo);
4772 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4773 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4774 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4775 else
4776 pattern_stmt
4777 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4779 else
4781 tree type = integer_type_for_mask (var, vinfo);
4782 tree cst0, cst1, tmp;
4784 if (!type)
4785 return NULL;
4787 /* We may directly use cond with narrowed type to avoid
4788 multiple cond exprs with following result packing and
4789 perform single cond with packed mask instead. In case
4790 of widening we better make cond first and then extract
4791 results. */
4792 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4793 type = TREE_TYPE (lhs);
4795 cst0 = build_int_cst (type, 0);
4796 cst1 = build_int_cst (type, 1);
4797 tmp = vect_recog_temp_ssa_var (type, NULL);
4798 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4800 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4802 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4803 append_pattern_def_seq (vinfo, stmt_vinfo,
4804 pattern_stmt, new_vectype);
4806 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4807 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4811 *type_out = vectype;
4812 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4814 return pattern_stmt;
4816 else if (rhs_code == COND_EXPR
4817 && TREE_CODE (var) == SSA_NAME)
4819 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4820 if (vectype == NULL_TREE)
4821 return NULL;
4823 /* Build a scalar type for the boolean result that when
4824 vectorized matches the vector type of the result in
4825 size and number of elements. */
4826 unsigned prec
4827 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4828 TYPE_VECTOR_SUBPARTS (vectype));
4830 tree type
4831 = build_nonstandard_integer_type (prec,
4832 TYPE_UNSIGNED (TREE_TYPE (var)));
4833 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4834 return NULL;
4836 if (check_bool_pattern (var, vinfo, bool_stmts))
4837 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4838 else if (integer_type_for_mask (var, vinfo))
4839 return NULL;
4841 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4842 pattern_stmt
4843 = gimple_build_assign (lhs, COND_EXPR,
4844 build2 (NE_EXPR, boolean_type_node,
4845 var, build_int_cst (TREE_TYPE (var), 0)),
4846 gimple_assign_rhs2 (last_stmt),
4847 gimple_assign_rhs3 (last_stmt));
4848 *type_out = vectype;
4849 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4851 return pattern_stmt;
4853 else if (rhs_code == SSA_NAME
4854 && STMT_VINFO_DATA_REF (stmt_vinfo))
4856 stmt_vec_info pattern_stmt_info;
4857 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4858 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4859 return NULL;
4861 if (check_bool_pattern (var, vinfo, bool_stmts))
4862 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4863 TREE_TYPE (vectype), stmt_vinfo);
4864 else
4866 tree type = integer_type_for_mask (var, vinfo);
4867 tree cst0, cst1, new_vectype;
4869 if (!type)
4870 return NULL;
4872 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4873 type = TREE_TYPE (vectype);
4875 cst0 = build_int_cst (type, 0);
4876 cst1 = build_int_cst (type, 1);
4877 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4879 rhs = vect_recog_temp_ssa_var (type, NULL);
4880 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4881 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4884 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4885 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4887 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4888 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4889 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4890 rhs = rhs2;
4892 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4893 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4894 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4895 *type_out = vectype;
4896 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4898 return pattern_stmt;
4900 else
4901 return NULL;
4905 /* A helper for vect_recog_mask_conversion_pattern. Build
4906 conversion of MASK to a type suitable for masking VECTYPE.
4907 Built statement gets required vectype and is appended to
4908 a pattern sequence of STMT_VINFO.
4910 Return converted mask. */
4912 static tree
4913 build_mask_conversion (vec_info *vinfo,
4914 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4916 gimple *stmt;
4917 tree masktype, tmp;
4919 masktype = truth_type_for (vectype);
4920 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4921 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4922 append_pattern_def_seq (vinfo, stmt_vinfo,
4923 stmt, masktype, TREE_TYPE (vectype));
4925 return tmp;
4929 /* Function vect_recog_mask_conversion_pattern
4931 Try to find statements which require boolean type
4932 converison. Additional conversion statements are
4933 added to handle such cases. For example:
4935 bool m_1, m_2, m_3;
4936 int i_4, i_5;
4937 double d_6, d_7;
4938 char c_1, c_2, c_3;
4940 S1 m_1 = i_4 > i_5;
4941 S2 m_2 = d_6 < d_7;
4942 S3 m_3 = m_1 & m_2;
4943 S4 c_1 = m_3 ? c_2 : c_3;
4945 Will be transformed into:
4947 S1 m_1 = i_4 > i_5;
4948 S2 m_2 = d_6 < d_7;
4949 S3'' m_2' = (_Bool[bitsize=32])m_2
4950 S3' m_3' = m_1 & m_2';
4951 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4952 S4' c_1' = m_3'' ? c_2 : c_3; */
4954 static gimple *
4955 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4956 stmt_vec_info stmt_vinfo, tree *type_out)
4958 gimple *last_stmt = stmt_vinfo->stmt;
4959 enum tree_code rhs_code;
4960 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4961 tree vectype1, vectype2;
4962 stmt_vec_info pattern_stmt_info;
4963 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4964 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4966 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4967 if (is_gimple_call (last_stmt)
4968 && gimple_call_internal_p (last_stmt))
4970 gcall *pattern_stmt;
4972 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4973 int mask_argno = internal_fn_mask_index (ifn);
4974 if (mask_argno < 0)
4975 return NULL;
4977 bool store_p = internal_store_fn_p (ifn);
4978 if (store_p)
4980 int rhs_index = internal_fn_stored_value_index (ifn);
4981 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4982 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4984 else
4986 lhs = gimple_call_lhs (last_stmt);
4987 if (!lhs)
4988 return NULL;
4989 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4992 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4993 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4994 if (!mask_arg_type)
4995 return NULL;
4996 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4998 if (!vectype1 || !vectype2
4999 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5000 TYPE_VECTOR_SUBPARTS (vectype2)))
5001 return NULL;
5003 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
5005 auto_vec<tree, 8> args;
5006 unsigned int nargs = gimple_call_num_args (last_stmt);
5007 args.safe_grow (nargs, true);
5008 for (unsigned int i = 0; i < nargs; ++i)
5009 args[i] = ((int) i == mask_argno
5010 ? tmp
5011 : gimple_call_arg (last_stmt, i));
5012 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
5014 if (!store_p)
5016 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5017 gimple_call_set_lhs (pattern_stmt, lhs);
5019 gimple_call_set_nothrow (pattern_stmt, true);
5021 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5022 if (STMT_VINFO_DATA_REF (stmt_vinfo))
5023 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5025 *type_out = vectype1;
5026 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5028 return pattern_stmt;
5031 if (!is_gimple_assign (last_stmt))
5032 return NULL;
5034 gimple *pattern_stmt;
5035 lhs = gimple_assign_lhs (last_stmt);
5036 rhs1 = gimple_assign_rhs1 (last_stmt);
5037 rhs_code = gimple_assign_rhs_code (last_stmt);
5039 /* Check for cond expression requiring mask conversion. */
5040 if (rhs_code == COND_EXPR)
5042 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5044 if (TREE_CODE (rhs1) == SSA_NAME)
5046 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5047 if (!rhs1_type)
5048 return NULL;
5050 else if (COMPARISON_CLASS_P (rhs1))
5052 /* Check whether we're comparing scalar booleans and (if so)
5053 whether a better mask type exists than the mask associated
5054 with boolean-sized elements. This avoids unnecessary packs
5055 and unpacks if the booleans are set from comparisons of
5056 wider types. E.g. in:
5058 int x1, x2, x3, x4, y1, y1;
5060 bool b1 = (x1 == x2);
5061 bool b2 = (x3 == x4);
5062 ... = b1 == b2 ? y1 : y2;
5064 it is better for b1 and b2 to use the mask type associated
5065 with int elements rather bool (byte) elements. */
5066 rhs1_op0 = TREE_OPERAND (rhs1, 0);
5067 rhs1_op1 = TREE_OPERAND (rhs1, 1);
5068 if (!rhs1_op0 || !rhs1_op1)
5069 return NULL;
5070 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
5071 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
5073 if (!rhs1_op0_type)
5074 rhs1_type = TREE_TYPE (rhs1_op0);
5075 else if (!rhs1_op1_type)
5076 rhs1_type = TREE_TYPE (rhs1_op1);
5077 else if (TYPE_PRECISION (rhs1_op0_type)
5078 != TYPE_PRECISION (rhs1_op1_type))
5080 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
5081 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5082 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
5083 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5084 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
5086 if (abs (tmp0) > abs (tmp1))
5087 rhs1_type = rhs1_op1_type;
5088 else
5089 rhs1_type = rhs1_op0_type;
5091 else
5092 rhs1_type = build_nonstandard_integer_type
5093 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
5095 else
5096 rhs1_type = rhs1_op0_type;
5098 else
5099 return NULL;
5101 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5103 if (!vectype1 || !vectype2)
5104 return NULL;
5106 /* Continue if a conversion is needed. Also continue if we have
5107 a comparison whose vector type would normally be different from
5108 VECTYPE2 when considered in isolation. In that case we'll
5109 replace the comparison with an SSA name (so that we can record
5110 its vector type) and behave as though the comparison was an SSA
5111 name from the outset. */
5112 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5113 TYPE_VECTOR_SUBPARTS (vectype2))
5114 && !rhs1_op0_type
5115 && !rhs1_op1_type)
5116 return NULL;
5118 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
5119 in place, we can handle it in vectorizable_condition. This avoids
5120 unnecessary promotion stmts and increased vectorization factor. */
5121 if (COMPARISON_CLASS_P (rhs1)
5122 && INTEGRAL_TYPE_P (rhs1_type)
5123 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
5124 TYPE_VECTOR_SUBPARTS (vectype2)))
5126 enum vect_def_type dt;
5127 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
5128 && dt == vect_external_def
5129 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
5130 && (dt == vect_external_def
5131 || dt == vect_constant_def))
5133 tree wide_scalar_type = build_nonstandard_integer_type
5134 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
5135 tree vectype3 = get_vectype_for_scalar_type (vinfo,
5136 wide_scalar_type);
5137 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
5138 return NULL;
5142 /* If rhs1 is a comparison we need to move it into a
5143 separate statement. */
5144 if (TREE_CODE (rhs1) != SSA_NAME)
5146 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
5147 if (rhs1_op0_type
5148 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
5149 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
5150 vectype2, stmt_vinfo);
5151 if (rhs1_op1_type
5152 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
5153 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
5154 vectype2, stmt_vinfo);
5155 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
5156 rhs1_op0, rhs1_op1);
5157 rhs1 = tmp;
5158 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
5159 rhs1_type);
5162 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
5163 TYPE_VECTOR_SUBPARTS (vectype2)))
5164 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
5165 else
5166 tmp = rhs1;
5168 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5169 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
5170 gimple_assign_rhs2 (last_stmt),
5171 gimple_assign_rhs3 (last_stmt));
5173 *type_out = vectype1;
5174 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5176 return pattern_stmt;
5179 /* Now check for binary boolean operations requiring conversion for
5180 one of operands. */
5181 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5182 return NULL;
5184 if (rhs_code != BIT_IOR_EXPR
5185 && rhs_code != BIT_XOR_EXPR
5186 && rhs_code != BIT_AND_EXPR
5187 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
5188 return NULL;
5190 rhs2 = gimple_assign_rhs2 (last_stmt);
5192 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5193 rhs2_type = integer_type_for_mask (rhs2, vinfo);
5195 if (!rhs1_type || !rhs2_type
5196 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
5197 return NULL;
5199 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
5201 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5202 if (!vectype1)
5203 return NULL;
5204 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
5206 else
5208 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
5209 if (!vectype1)
5210 return NULL;
5211 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
5214 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5215 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
5217 *type_out = vectype1;
5218 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5220 return pattern_stmt;
5223 /* STMT_INFO is a load or store. If the load or store is conditional, return
5224 the boolean condition under which it occurs, otherwise return null. */
5226 static tree
5227 vect_get_load_store_mask (stmt_vec_info stmt_info)
5229 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
5231 gcc_assert (gimple_assign_single_p (def_assign));
5232 return NULL_TREE;
5235 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
5237 internal_fn ifn = gimple_call_internal_fn (def_call);
5238 int mask_index = internal_fn_mask_index (ifn);
5239 return gimple_call_arg (def_call, mask_index);
5242 gcc_unreachable ();
5245 /* Return MASK if MASK is suitable for masking an operation on vectors
5246 of type VECTYPE, otherwise convert it into such a form and return
5247 the result. Associate any conversion statements with STMT_INFO's
5248 pattern. */
5250 static tree
5251 vect_convert_mask_for_vectype (tree mask, tree vectype,
5252 stmt_vec_info stmt_info, vec_info *vinfo)
5254 tree mask_type = integer_type_for_mask (mask, vinfo);
5255 if (mask_type)
5257 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
5258 if (mask_vectype
5259 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
5260 TYPE_VECTOR_SUBPARTS (mask_vectype)))
5261 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
5263 return mask;
5266 /* Return the equivalent of:
5268 fold_convert (TYPE, VALUE)
5270 with the expectation that the operation will be vectorized.
5271 If new statements are needed, add them as pattern statements
5272 to STMT_INFO. */
5274 static tree
5275 vect_add_conversion_to_pattern (vec_info *vinfo,
5276 tree type, tree value, stmt_vec_info stmt_info)
5278 if (useless_type_conversion_p (type, TREE_TYPE (value)))
5279 return value;
5281 tree new_value = vect_recog_temp_ssa_var (type, NULL);
5282 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
5283 append_pattern_def_seq (vinfo, stmt_info, conversion,
5284 get_vectype_for_scalar_type (vinfo, type));
5285 return new_value;
5288 /* Try to convert STMT_INFO into a call to a gather load or scatter store
5289 internal function. Return the final statement on success and set
5290 *TYPE_OUT to the vector type being loaded or stored.
5292 This function only handles gathers and scatters that were recognized
5293 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
5295 static gimple *
5296 vect_recog_gather_scatter_pattern (vec_info *vinfo,
5297 stmt_vec_info stmt_info, tree *type_out)
5299 /* Currently we only support this for loop vectorization. */
5300 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5301 if (!loop_vinfo)
5302 return NULL;
5304 /* Make sure that we're looking at a gather load or scatter store. */
5305 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
5306 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5307 return NULL;
5309 /* Get the boolean that controls whether the load or store happens.
5310 This is null if the operation is unconditional. */
5311 tree mask = vect_get_load_store_mask (stmt_info);
5313 /* Make sure that the target supports an appropriate internal
5314 function for the gather/scatter operation. */
5315 gather_scatter_info gs_info;
5316 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
5317 || gs_info.ifn == IFN_LAST)
5318 return NULL;
5320 /* Convert the mask to the right form. */
5321 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
5322 gs_info.element_type);
5323 if (mask)
5324 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
5325 loop_vinfo);
5326 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
5327 || gs_info.ifn == IFN_MASK_GATHER_LOAD)
5328 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
5330 /* Get the invariant base and non-invariant offset, converting the
5331 latter to the same width as the vector elements. */
5332 tree base = gs_info.base;
5333 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
5334 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
5335 gs_info.offset, stmt_info);
5337 /* Build the new pattern statement. */
5338 tree scale = size_int (gs_info.scale);
5339 gcall *pattern_stmt;
5340 if (DR_IS_READ (dr))
5342 tree zero = build_zero_cst (gs_info.element_type);
5343 if (mask != NULL)
5344 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
5345 offset, scale, zero, mask);
5346 else
5347 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
5348 offset, scale, zero);
5349 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
5350 gimple_call_set_lhs (pattern_stmt, load_lhs);
5352 else
5354 tree rhs = vect_get_store_rhs (stmt_info);
5355 if (mask != NULL)
5356 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
5357 base, offset, scale, rhs,
5358 mask);
5359 else
5360 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
5361 base, offset, scale, rhs);
5363 gimple_call_set_nothrow (pattern_stmt, true);
5365 /* Copy across relevant vectorization info and associate DR with the
5366 new pattern statement instead of the original statement. */
5367 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
5368 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
5370 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5371 *type_out = vectype;
5372 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
5374 return pattern_stmt;
5377 /* Return true if TYPE is a non-boolean integer type. These are the types
5378 that we want to consider for narrowing. */
5380 static bool
5381 vect_narrowable_type_p (tree type)
5383 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
5386 /* Return true if the operation given by CODE can be truncated to N bits
5387 when only N bits of the output are needed. This is only true if bit N+1
5388 of the inputs has no effect on the low N bits of the result. */
5390 static bool
5391 vect_truncatable_operation_p (tree_code code)
5393 switch (code)
5395 case PLUS_EXPR:
5396 case MINUS_EXPR:
5397 case MULT_EXPR:
5398 case BIT_AND_EXPR:
5399 case BIT_IOR_EXPR:
5400 case BIT_XOR_EXPR:
5401 case COND_EXPR:
5402 return true;
5404 default:
5405 return false;
5409 /* Record that STMT_INFO could be changed from operating on TYPE to
5410 operating on a type with the precision and sign given by PRECISION
5411 and SIGN respectively. PRECISION is an arbitrary bit precision;
5412 it might not be a whole number of bytes. */
5414 static void
5415 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
5416 unsigned int precision, signop sign)
5418 /* Round the precision up to a whole number of bytes. */
5419 precision = vect_element_precision (precision);
5420 if (precision < TYPE_PRECISION (type)
5421 && (!stmt_info->operation_precision
5422 || stmt_info->operation_precision > precision))
5424 stmt_info->operation_precision = precision;
5425 stmt_info->operation_sign = sign;
5429 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5430 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
5431 is an arbitrary bit precision; it might not be a whole number of bytes. */
5433 static void
5434 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
5435 unsigned int min_input_precision)
5437 /* This operation in isolation only requires the inputs to have
5438 MIN_INPUT_PRECISION of precision, However, that doesn't mean
5439 that MIN_INPUT_PRECISION is a natural precision for the chain
5440 as a whole. E.g. consider something like:
5442 unsigned short *x, *y;
5443 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5445 The right shift can be done on unsigned chars, and only requires the
5446 result of "*x & 0xf0" to be done on unsigned chars. But taking that
5447 approach would mean turning a natural chain of single-vector unsigned
5448 short operations into one that truncates "*x" and then extends
5449 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5450 operation and one vector for each unsigned char operation.
5451 This would be a significant pessimization.
5453 Instead only propagate the maximum of this precision and the precision
5454 required by the users of the result. This means that we don't pessimize
5455 the case above but continue to optimize things like:
5457 unsigned char *y;
5458 unsigned short *x;
5459 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5461 Here we would truncate two vectors of *x to a single vector of
5462 unsigned chars and use single-vector unsigned char operations for
5463 everything else, rather than doing two unsigned short copies of
5464 "(*x & 0xf0) >> 4" and then truncating the result. */
5465 min_input_precision = MAX (min_input_precision,
5466 stmt_info->min_output_precision);
5468 if (min_input_precision < TYPE_PRECISION (type)
5469 && (!stmt_info->min_input_precision
5470 || stmt_info->min_input_precision > min_input_precision))
5471 stmt_info->min_input_precision = min_input_precision;
5474 /* Subroutine of vect_determine_min_output_precision. Return true if
5475 we can calculate a reduced number of output bits for STMT_INFO,
5476 whose result is LHS. */
5478 static bool
5479 vect_determine_min_output_precision_1 (vec_info *vinfo,
5480 stmt_vec_info stmt_info, tree lhs)
5482 /* Take the maximum precision required by users of the result. */
5483 unsigned int precision = 0;
5484 imm_use_iterator iter;
5485 use_operand_p use;
5486 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5488 gimple *use_stmt = USE_STMT (use);
5489 if (is_gimple_debug (use_stmt))
5490 continue;
5491 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5492 if (!use_stmt_info || !use_stmt_info->min_input_precision)
5493 return false;
5494 /* The input precision recorded for COND_EXPRs applies only to the
5495 "then" and "else" values. */
5496 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5497 if (assign
5498 && gimple_assign_rhs_code (assign) == COND_EXPR
5499 && use->use != gimple_assign_rhs2_ptr (assign)
5500 && use->use != gimple_assign_rhs3_ptr (assign))
5501 return false;
5502 precision = MAX (precision, use_stmt_info->min_input_precision);
5505 if (dump_enabled_p ())
5506 dump_printf_loc (MSG_NOTE, vect_location,
5507 "only the low %d bits of %T are significant\n",
5508 precision, lhs);
5509 stmt_info->min_output_precision = precision;
5510 return true;
5513 /* Calculate min_output_precision for STMT_INFO. */
5515 static void
5516 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5518 /* We're only interested in statements with a narrowable result. */
5519 tree lhs = gimple_get_lhs (stmt_info->stmt);
5520 if (!lhs
5521 || TREE_CODE (lhs) != SSA_NAME
5522 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5523 return;
5525 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5526 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5529 /* Use range information to decide whether STMT (described by STMT_INFO)
5530 could be done in a narrower type. This is effectively a forward
5531 propagation, since it uses context-independent information that applies
5532 to all users of an SSA name. */
5534 static void
5535 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5537 tree lhs = gimple_assign_lhs (stmt);
5538 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5539 return;
5541 tree type = TREE_TYPE (lhs);
5542 if (!vect_narrowable_type_p (type))
5543 return;
5545 /* First see whether we have any useful range information for the result. */
5546 unsigned int precision = TYPE_PRECISION (type);
5547 signop sign = TYPE_SIGN (type);
5548 wide_int min_value, max_value;
5549 if (!vect_get_range_info (lhs, &min_value, &max_value))
5550 return;
5552 tree_code code = gimple_assign_rhs_code (stmt);
5553 unsigned int nops = gimple_num_ops (stmt);
5555 if (!vect_truncatable_operation_p (code))
5556 /* Check that all relevant input operands are compatible, and update
5557 [MIN_VALUE, MAX_VALUE] to include their ranges. */
5558 for (unsigned int i = 1; i < nops; ++i)
5560 tree op = gimple_op (stmt, i);
5561 if (TREE_CODE (op) == INTEGER_CST)
5563 /* Don't require the integer to have RHS_TYPE (which it might
5564 not for things like shift amounts, etc.), but do require it
5565 to fit the type. */
5566 if (!int_fits_type_p (op, type))
5567 return;
5569 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
5570 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
5572 else if (TREE_CODE (op) == SSA_NAME)
5574 /* Ignore codes that don't take uniform arguments. */
5575 if (!types_compatible_p (TREE_TYPE (op), type))
5576 return;
5578 wide_int op_min_value, op_max_value;
5579 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5580 return;
5582 min_value = wi::min (min_value, op_min_value, sign);
5583 max_value = wi::max (max_value, op_max_value, sign);
5585 else
5586 return;
5589 /* Try to switch signed types for unsigned types if we can.
5590 This is better for two reasons. First, unsigned ops tend
5591 to be cheaper than signed ops. Second, it means that we can
5592 handle things like:
5594 signed char c;
5595 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5599 signed char c;
5600 unsigned short res_1 = (unsigned short) c & 0xff00;
5601 int res = (int) res_1;
5603 where the intermediate result res_1 has unsigned rather than
5604 signed type. */
5605 if (sign == SIGNED && !wi::neg_p (min_value))
5606 sign = UNSIGNED;
5608 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
5609 unsigned int precision1 = wi::min_precision (min_value, sign);
5610 unsigned int precision2 = wi::min_precision (max_value, sign);
5611 unsigned int value_precision = MAX (precision1, precision2);
5612 if (value_precision >= precision)
5613 return;
5615 if (dump_enabled_p ())
5616 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5617 " without loss of precision: %G",
5618 sign == SIGNED ? "signed" : "unsigned",
5619 value_precision, (gimple *) stmt);
5621 vect_set_operation_type (stmt_info, type, value_precision, sign);
5622 vect_set_min_input_precision (stmt_info, type, value_precision);
5625 /* Use information about the users of STMT's result to decide whether
5626 STMT (described by STMT_INFO) could be done in a narrower type.
5627 This is effectively a backward propagation. */
5629 static void
5630 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5632 tree_code code = gimple_assign_rhs_code (stmt);
5633 unsigned int opno = (code == COND_EXPR ? 2 : 1);
5634 tree type = TREE_TYPE (gimple_op (stmt, opno));
5635 if (!vect_narrowable_type_p (type))
5636 return;
5638 unsigned int precision = TYPE_PRECISION (type);
5639 unsigned int operation_precision, min_input_precision;
5640 switch (code)
5642 CASE_CONVERT:
5643 /* Only the bits that contribute to the output matter. Don't change
5644 the precision of the operation itself. */
5645 operation_precision = precision;
5646 min_input_precision = stmt_info->min_output_precision;
5647 break;
5649 case LSHIFT_EXPR:
5650 case RSHIFT_EXPR:
5652 tree shift = gimple_assign_rhs2 (stmt);
5653 if (TREE_CODE (shift) != INTEGER_CST
5654 || !wi::ltu_p (wi::to_widest (shift), precision))
5655 return;
5656 unsigned int const_shift = TREE_INT_CST_LOW (shift);
5657 if (code == LSHIFT_EXPR)
5659 /* Avoid creating an undefined shift.
5661 ??? We could instead use min_output_precision as-is and
5662 optimize out-of-range shifts to zero. However, only
5663 degenerate testcases shift away all their useful input data,
5664 and it isn't natural to drop input operations in the middle
5665 of vectorization. This sort of thing should really be
5666 handled before vectorization. */
5667 operation_precision = MAX (stmt_info->min_output_precision,
5668 const_shift + 1);
5669 /* We need CONST_SHIFT fewer bits of the input. */
5670 min_input_precision = (MAX (operation_precision, const_shift)
5671 - const_shift);
5673 else
5675 /* We need CONST_SHIFT extra bits to do the operation. */
5676 operation_precision = (stmt_info->min_output_precision
5677 + const_shift);
5678 min_input_precision = operation_precision;
5680 break;
5683 default:
5684 if (vect_truncatable_operation_p (code))
5686 /* Input bit N has no effect on output bits N-1 and lower. */
5687 operation_precision = stmt_info->min_output_precision;
5688 min_input_precision = operation_precision;
5689 break;
5691 return;
5694 if (operation_precision < precision)
5696 if (dump_enabled_p ())
5697 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5698 " without affecting users: %G",
5699 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5700 operation_precision, (gimple *) stmt);
5701 vect_set_operation_type (stmt_info, type, operation_precision,
5702 TYPE_SIGN (type));
5704 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5707 /* Return true if the statement described by STMT_INFO sets a boolean
5708 SSA_NAME and if we know how to vectorize this kind of statement using
5709 vector mask types. */
5711 static bool
5712 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5714 tree lhs = gimple_get_lhs (stmt_info->stmt);
5715 if (!lhs
5716 || TREE_CODE (lhs) != SSA_NAME
5717 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5718 return false;
5720 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5722 tree_code rhs_code = gimple_assign_rhs_code (assign);
5723 switch (rhs_code)
5725 CASE_CONVERT:
5726 case SSA_NAME:
5727 case BIT_NOT_EXPR:
5728 case BIT_IOR_EXPR:
5729 case BIT_XOR_EXPR:
5730 case BIT_AND_EXPR:
5731 return true;
5733 default:
5734 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5737 else if (is_a <gphi *> (stmt_info->stmt))
5738 return true;
5739 return false;
5742 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5743 a vector mask type instead of a normal vector type. Record the
5744 result in STMT_INFO->mask_precision. */
5746 static void
5747 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5749 if (!possible_vector_mask_operation_p (stmt_info))
5750 return;
5752 /* If at least one boolean input uses a vector mask type,
5753 pick the mask type with the narrowest elements.
5755 ??? This is the traditional behavior. It should always produce
5756 the smallest number of operations, but isn't necessarily the
5757 optimal choice. For example, if we have:
5759 a = b & c
5761 where:
5763 - the user of a wants it to have a mask type for 16-bit elements (M16)
5764 - b also uses M16
5765 - c uses a mask type for 8-bit elements (M8)
5767 then picking M8 gives:
5769 - 1 M16->M8 pack for b
5770 - 1 M8 AND for a
5771 - 2 M8->M16 unpacks for the user of a
5773 whereas picking M16 would have given:
5775 - 2 M8->M16 unpacks for c
5776 - 2 M16 ANDs for a
5778 The number of operations are equal, but M16 would have given
5779 a shorter dependency chain and allowed more ILP. */
5780 unsigned int precision = ~0U;
5781 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5783 unsigned int nops = gimple_num_ops (assign);
5784 for (unsigned int i = 1; i < nops; ++i)
5786 tree rhs = gimple_op (assign, i);
5787 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5788 continue;
5790 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5791 if (!def_stmt_info)
5792 /* Don't let external or constant operands influence the choice.
5793 We can convert them to whichever vector type we pick. */
5794 continue;
5796 if (def_stmt_info->mask_precision)
5798 if (precision > def_stmt_info->mask_precision)
5799 precision = def_stmt_info->mask_precision;
5803 /* If the statement compares two values that shouldn't use vector masks,
5804 try comparing the values as normal scalars instead. */
5805 tree_code rhs_code = gimple_assign_rhs_code (assign);
5806 if (precision == ~0U
5807 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5809 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5810 scalar_mode mode;
5811 tree vectype, mask_type;
5812 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5813 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5814 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5815 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5816 precision = GET_MODE_BITSIZE (mode);
5819 else
5821 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5822 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5824 tree rhs = gimple_phi_arg_def (phi, i);
5826 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5827 if (!def_stmt_info)
5828 /* Don't let external or constant operands influence the choice.
5829 We can convert them to whichever vector type we pick. */
5830 continue;
5832 if (def_stmt_info->mask_precision)
5834 if (precision > def_stmt_info->mask_precision)
5835 precision = def_stmt_info->mask_precision;
5840 if (dump_enabled_p ())
5842 if (precision == ~0U)
5843 dump_printf_loc (MSG_NOTE, vect_location,
5844 "using normal nonmask vectors for %G",
5845 stmt_info->stmt);
5846 else
5847 dump_printf_loc (MSG_NOTE, vect_location,
5848 "using boolean precision %d for %G",
5849 precision, stmt_info->stmt);
5852 stmt_info->mask_precision = precision;
5855 /* Handle vect_determine_precisions for STMT_INFO, given that we
5856 have already done so for the users of its result. */
5858 void
5859 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5861 vect_determine_min_output_precision (vinfo, stmt_info);
5862 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5864 vect_determine_precisions_from_range (stmt_info, stmt);
5865 vect_determine_precisions_from_users (stmt_info, stmt);
5869 /* Walk backwards through the vectorizable region to determine the
5870 values of these fields:
5872 - min_output_precision
5873 - min_input_precision
5874 - operation_precision
5875 - operation_sign. */
5877 void
5878 vect_determine_precisions (vec_info *vinfo)
5880 DUMP_VECT_SCOPE ("vect_determine_precisions");
5882 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5884 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5885 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5886 unsigned int nbbs = loop->num_nodes;
5888 for (unsigned int i = 0; i < nbbs; i++)
5890 basic_block bb = bbs[i];
5891 for (auto gsi = gsi_start_phis (bb);
5892 !gsi_end_p (gsi); gsi_next (&gsi))
5894 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5895 if (stmt_info)
5896 vect_determine_mask_precision (vinfo, stmt_info);
5898 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5899 if (!is_gimple_debug (gsi_stmt (si)))
5900 vect_determine_mask_precision
5901 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5903 for (unsigned int i = 0; i < nbbs; i++)
5905 basic_block bb = bbs[nbbs - i - 1];
5906 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5907 !gsi_end_p (si); gsi_prev (&si))
5908 if (!is_gimple_debug (gsi_stmt (si)))
5909 vect_determine_stmt_precisions
5910 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5911 for (auto gsi = gsi_start_phis (bb);
5912 !gsi_end_p (gsi); gsi_next (&gsi))
5914 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5915 if (stmt_info)
5916 vect_determine_stmt_precisions (vinfo, stmt_info);
5920 else
5922 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5923 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5925 basic_block bb = bb_vinfo->bbs[i];
5926 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5928 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5929 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5930 vect_determine_mask_precision (vinfo, stmt_info);
5932 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5934 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5935 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5936 vect_determine_mask_precision (vinfo, stmt_info);
5939 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5941 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5942 !gsi_end_p (gsi); gsi_prev (&gsi))
5944 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5945 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5946 vect_determine_stmt_precisions (vinfo, stmt_info);
5948 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5949 !gsi_end_p (gsi); gsi_next (&gsi))
5951 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5952 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5953 vect_determine_stmt_precisions (vinfo, stmt_info);
5959 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5961 struct vect_recog_func
5963 vect_recog_func_ptr fn;
5964 const char *name;
5967 /* Note that ordering matters - the first pattern matching on a stmt is
5968 taken which means usually the more complex one needs to preceed the
5969 less comples onex (widen_sum only after dot_prod or sad for example). */
5970 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5971 { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
5972 { vect_recog_bit_insert_pattern, "bit_insert" },
5973 { vect_recog_over_widening_pattern, "over_widening" },
5974 /* Must come after over_widening, which narrows the shift as much as
5975 possible beforehand. */
5976 { vect_recog_average_pattern, "average" },
5977 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
5978 { vect_recog_mulhs_pattern, "mult_high" },
5979 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5980 { vect_recog_widen_mult_pattern, "widen_mult" },
5981 { vect_recog_dot_prod_pattern, "dot_prod" },
5982 { vect_recog_sad_pattern, "sad" },
5983 { vect_recog_widen_sum_pattern, "widen_sum" },
5984 { vect_recog_pow_pattern, "pow" },
5985 { vect_recog_popcount_pattern, "popcount" },
5986 { vect_recog_widen_shift_pattern, "widen_shift" },
5987 { vect_recog_rotate_pattern, "rotate" },
5988 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5989 { vect_recog_divmod_pattern, "divmod" },
5990 { vect_recog_mult_pattern, "mult" },
5991 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5992 { vect_recog_bool_pattern, "bool" },
5993 /* This must come before mask conversion, and includes the parts
5994 of mask conversion that are needed for gather and scatter
5995 internal functions. */
5996 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5997 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5998 { vect_recog_widen_plus_pattern, "widen_plus" },
5999 { vect_recog_widen_minus_pattern, "widen_minus" },
6002 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
6004 /* Mark statements that are involved in a pattern. */
6006 void
6007 vect_mark_pattern_stmts (vec_info *vinfo,
6008 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
6009 tree pattern_vectype)
6011 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
6012 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6014 gimple *orig_pattern_stmt = NULL;
6015 if (is_pattern_stmt_p (orig_stmt_info))
6017 /* We're replacing a statement in an existing pattern definition
6018 sequence. */
6019 orig_pattern_stmt = orig_stmt_info->stmt;
6020 if (dump_enabled_p ())
6021 dump_printf_loc (MSG_NOTE, vect_location,
6022 "replacing earlier pattern %G", orig_pattern_stmt);
6024 /* To keep the book-keeping simple, just swap the lhs of the
6025 old and new statements, so that the old one has a valid but
6026 unused lhs. */
6027 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
6028 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
6029 gimple_set_lhs (pattern_stmt, old_lhs);
6031 if (dump_enabled_p ())
6032 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
6034 /* Switch to the statement that ORIG replaces. */
6035 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
6037 /* We shouldn't be replacing the main pattern statement. */
6038 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
6039 != orig_pattern_stmt);
6042 if (def_seq)
6043 for (gimple_stmt_iterator si = gsi_start (def_seq);
6044 !gsi_end_p (si); gsi_next (&si))
6046 if (dump_enabled_p ())
6047 dump_printf_loc (MSG_NOTE, vect_location,
6048 "extra pattern stmt: %G", gsi_stmt (si));
6049 stmt_vec_info pattern_stmt_info
6050 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
6051 orig_stmt_info, pattern_vectype);
6052 /* Stmts in the def sequence are not vectorizable cycle or
6053 induction defs, instead they should all be vect_internal_def
6054 feeding the main pattern stmt which retains this def type. */
6055 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
6058 if (orig_pattern_stmt)
6060 vect_init_pattern_stmt (vinfo, pattern_stmt,
6061 orig_stmt_info, pattern_vectype);
6063 /* Insert all the new pattern statements before the original one. */
6064 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6065 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
6066 orig_def_seq);
6067 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
6068 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
6070 /* Remove the pattern statement that this new pattern replaces. */
6071 gsi_remove (&gsi, false);
6073 else
6074 vect_set_pattern_stmt (vinfo,
6075 pattern_stmt, orig_stmt_info, pattern_vectype);
6077 /* Transfer reduction path info to the pattern. */
6078 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
6080 gimple_match_op op;
6081 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
6082 gcc_unreachable ();
6083 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
6084 /* Search the pattern def sequence and the main pattern stmt. Note
6085 we may have inserted all into a containing pattern def sequence
6086 so the following is a bit awkward. */
6087 gimple_stmt_iterator si;
6088 gimple *s;
6089 if (def_seq)
6091 si = gsi_start (def_seq);
6092 s = gsi_stmt (si);
6093 gsi_next (&si);
6095 else
6097 si = gsi_none ();
6098 s = pattern_stmt;
6102 bool found = false;
6103 if (gimple_extract_op (s, &op))
6104 for (unsigned i = 0; i < op.num_ops; ++i)
6105 if (op.ops[i] == lookfor)
6107 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
6108 lookfor = gimple_get_lhs (s);
6109 found = true;
6110 break;
6112 if (s == pattern_stmt)
6114 if (!found && dump_enabled_p ())
6115 dump_printf_loc (MSG_NOTE, vect_location,
6116 "failed to update reduction index.\n");
6117 break;
6119 if (gsi_end_p (si))
6120 s = pattern_stmt;
6121 else
6123 s = gsi_stmt (si);
6124 if (s == pattern_stmt)
6125 /* Found the end inside a bigger pattern def seq. */
6126 si = gsi_none ();
6127 else
6128 gsi_next (&si);
6130 } while (1);
6134 /* Function vect_pattern_recog_1
6136 Input:
6137 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
6138 computation pattern.
6139 STMT_INFO: A stmt from which the pattern search should start.
6141 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
6142 a sequence of statements that has the same functionality and can be
6143 used to replace STMT_INFO. It returns the last statement in the sequence
6144 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
6145 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
6146 statement, having first checked that the target supports the new operation
6147 in that type.
6149 This function also does some bookkeeping, as explained in the documentation
6150 for vect_recog_pattern. */
6152 static void
6153 vect_pattern_recog_1 (vec_info *vinfo,
6154 vect_recog_func *recog_func, stmt_vec_info stmt_info)
6156 gimple *pattern_stmt;
6157 loop_vec_info loop_vinfo;
6158 tree pattern_vectype;
6160 /* If this statement has already been replaced with pattern statements,
6161 leave the original statement alone, since the first match wins.
6162 Instead try to match against the definition statements that feed
6163 the main pattern statement. */
6164 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6166 gimple_stmt_iterator gsi;
6167 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6168 !gsi_end_p (gsi); gsi_next (&gsi))
6169 vect_pattern_recog_1 (vinfo, recog_func,
6170 vinfo->lookup_stmt (gsi_stmt (gsi)));
6171 return;
6174 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6175 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
6176 if (!pattern_stmt)
6178 /* Clear any half-formed pattern definition sequence. */
6179 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
6180 return;
6183 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6185 /* Found a vectorizable pattern. */
6186 if (dump_enabled_p ())
6187 dump_printf_loc (MSG_NOTE, vect_location,
6188 "%s pattern recognized: %G",
6189 recog_func->name, pattern_stmt);
6191 /* Mark the stmts that are involved in the pattern. */
6192 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
6194 /* Patterns cannot be vectorized using SLP, because they change the order of
6195 computation. */
6196 if (loop_vinfo)
6198 unsigned ix, ix2;
6199 stmt_vec_info *elem_ptr;
6200 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
6201 elem_ptr, *elem_ptr == stmt_info);
6206 /* Function vect_pattern_recog
6208 Input:
6209 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
6210 computation idioms.
6212 Output - for each computation idiom that is detected we create a new stmt
6213 that provides the same functionality and that can be vectorized. We
6214 also record some information in the struct_stmt_info of the relevant
6215 stmts, as explained below:
6217 At the entry to this function we have the following stmts, with the
6218 following initial value in the STMT_VINFO fields:
6220 stmt in_pattern_p related_stmt vec_stmt
6221 S1: a_i = .... - - -
6222 S2: a_2 = ..use(a_i).. - - -
6223 S3: a_1 = ..use(a_2).. - - -
6224 S4: a_0 = ..use(a_1).. - - -
6225 S5: ... = ..use(a_0).. - - -
6227 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
6228 represented by a single stmt. We then:
6229 - create a new stmt S6 equivalent to the pattern (the stmt is not
6230 inserted into the code)
6231 - fill in the STMT_VINFO fields as follows:
6233 in_pattern_p related_stmt vec_stmt
6234 S1: a_i = .... - - -
6235 S2: a_2 = ..use(a_i).. - - -
6236 S3: a_1 = ..use(a_2).. - - -
6237 S4: a_0 = ..use(a_1).. true S6 -
6238 '---> S6: a_new = .... - S4 -
6239 S5: ... = ..use(a_0).. - - -
6241 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
6242 to each other through the RELATED_STMT field).
6244 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
6245 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
6246 remain irrelevant unless used by stmts other than S4.
6248 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
6249 (because they are marked as irrelevant). It will vectorize S6, and record
6250 a pointer to the new vector stmt VS6 from S6 (as usual).
6251 S4 will be skipped, and S5 will be vectorized as usual:
6253 in_pattern_p related_stmt vec_stmt
6254 S1: a_i = .... - - -
6255 S2: a_2 = ..use(a_i).. - - -
6256 S3: a_1 = ..use(a_2).. - - -
6257 > VS6: va_new = .... - - -
6258 S4: a_0 = ..use(a_1).. true S6 VS6
6259 '---> S6: a_new = .... - S4 VS6
6260 > VS5: ... = ..vuse(va_new).. - - -
6261 S5: ... = ..use(a_0).. - - -
6263 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
6264 elsewhere), and we'll end up with:
6266 VS6: va_new = ....
6267 VS5: ... = ..vuse(va_new)..
6269 In case of more than one pattern statements, e.g., widen-mult with
6270 intermediate type:
6272 S1 a_t = ;
6273 S2 a_T = (TYPE) a_t;
6274 '--> S3: a_it = (interm_type) a_t;
6275 S4 prod_T = a_T * CONST;
6276 '--> S5: prod_T' = a_it w* CONST;
6278 there may be other users of a_T outside the pattern. In that case S2 will
6279 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
6280 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
6281 be recorded in S3. */
6283 void
6284 vect_pattern_recog (vec_info *vinfo)
6286 class loop *loop;
6287 basic_block *bbs;
6288 unsigned int nbbs;
6289 gimple_stmt_iterator si;
6290 unsigned int i, j;
6292 vect_determine_precisions (vinfo);
6294 DUMP_VECT_SCOPE ("vect_pattern_recog");
6296 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
6298 loop = LOOP_VINFO_LOOP (loop_vinfo);
6299 bbs = LOOP_VINFO_BBS (loop_vinfo);
6300 nbbs = loop->num_nodes;
6302 /* Scan through the loop stmts, applying the pattern recognition
6303 functions starting at each stmt visited: */
6304 for (i = 0; i < nbbs; i++)
6306 basic_block bb = bbs[i];
6307 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
6309 if (is_gimple_debug (gsi_stmt (si)))
6310 continue;
6311 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
6312 /* Scan over all generic vect_recog_xxx_pattern functions. */
6313 for (j = 0; j < NUM_PATTERNS; j++)
6314 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
6315 stmt_info);
6319 else
6321 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6322 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6323 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
6324 !gsi_end_p (gsi); gsi_next (&gsi))
6326 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
6327 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
6328 continue;
6330 /* Scan over all generic vect_recog_xxx_pattern functions. */
6331 for (j = 0; j < NUM_PATTERNS; j++)
6332 vect_pattern_recog_1 (vinfo,
6333 &vect_vect_recog_func_ptrs[j], stmt_info);
6337 /* After this no more add_stmt calls are allowed. */
6338 vinfo->stmt_vec_info_ro = true;