typeck.c (cp_truthvalue_conversion): Add tsubst_flags_t parameter and use it in calls...
[official-gcc.git] / gcc / tree-vect-patterns.c
blob86c3abfa819532398484ee2decffdecf4b4181b0
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
52 /* Return true if we have a useful VR_RANGE range for VAR, storing it
53 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
55 static bool
56 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
58 value_range_kind vr_type = get_range_info (var, min_value, max_value);
59 wide_int nonzero = get_nonzero_bits (var);
60 signop sgn = TYPE_SIGN (TREE_TYPE (var));
61 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 nonzero, sgn) == VR_RANGE)
64 if (dump_enabled_p ())
66 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 dump_printf (MSG_NOTE, " has range [");
68 dump_hex (MSG_NOTE, *min_value);
69 dump_printf (MSG_NOTE, ", ");
70 dump_hex (MSG_NOTE, *max_value);
71 dump_printf (MSG_NOTE, "]\n");
73 return true;
75 else
77 if (dump_enabled_p ())
79 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 dump_printf (MSG_NOTE, " has no range info\n");
82 return false;
86 /* Report that we've found an instance of pattern PATTERN in
87 statement STMT. */
89 static void
90 vect_pattern_detected (const char *name, gimple *stmt)
92 if (dump_enabled_p ())
93 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
96 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97 return the pattern statement's stmt_vec_info. Set its vector type to
98 VECTYPE if it doesn't have one already. */
100 static stmt_vec_info
101 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
102 tree vectype)
104 vec_info *vinfo = orig_stmt_info->vinfo;
105 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
106 if (pattern_stmt_info == NULL)
107 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
108 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
110 pattern_stmt_info->pattern_stmt_p = true;
111 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
112 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
113 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
114 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
115 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
116 return pattern_stmt_info;
119 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
120 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
121 have one already. */
123 static void
124 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
125 tree vectype)
127 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
128 STMT_VINFO_RELATED_STMT (orig_stmt_info)
129 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
132 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
133 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
134 be different from the vector type of the final pattern statement. */
136 static inline void
137 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
138 tree vectype = NULL_TREE)
140 vec_info *vinfo = stmt_info->vinfo;
141 if (vectype)
143 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
144 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
146 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
147 new_stmt);
150 /* The caller wants to perform new operations on vect_external variable
151 VAR, so that the result of the operations would also be vect_external.
152 Return the edge on which the operations can be performed, if one exists.
153 Return null if the operations should instead be treated as part of
154 the pattern that needs them. */
156 static edge
157 vect_get_external_def_edge (vec_info *vinfo, tree var)
159 edge e = NULL;
160 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
162 e = loop_preheader_edge (loop_vinfo->loop);
163 if (!SSA_NAME_IS_DEFAULT_DEF (var))
165 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
166 if (bb == NULL
167 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
168 e = NULL;
171 return e;
174 /* Return true if the target supports a vector version of CODE,
175 where CODE is known to map to a direct optab. ITYPE specifies
176 the type of (some of) the scalar inputs and OTYPE specifies the
177 type of the scalar result.
179 If CODE allows the inputs and outputs to have different type
180 (such as for WIDEN_SUM_EXPR), it is the input mode rather
181 than the output mode that determines the appropriate target pattern.
182 Operand 0 of the target pattern then specifies the mode that the output
183 must have.
185 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
186 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
187 is nonnull. */
189 static bool
190 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
191 tree itype, tree *vecotype_out,
192 tree *vecitype_out = NULL)
194 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
195 if (!vecitype)
196 return false;
198 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
199 if (!vecotype)
200 return false;
202 optab optab = optab_for_tree_code (code, vecitype, optab_default);
203 if (!optab)
204 return false;
206 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
207 if (icode == CODE_FOR_nothing
208 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
209 return false;
211 *vecotype_out = vecotype;
212 if (vecitype_out)
213 *vecitype_out = vecitype;
214 return true;
217 /* Round bit precision PRECISION up to a full element. */
219 static unsigned int
220 vect_element_precision (unsigned int precision)
222 precision = 1 << ceil_log2 (precision);
223 return MAX (precision, BITS_PER_UNIT);
226 /* If OP is defined by a statement that's being considered for vectorization,
227 return information about that statement, otherwise return NULL. */
229 static stmt_vec_info
230 vect_get_internal_def (vec_info *vinfo, tree op)
232 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
233 if (def_stmt_info
234 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
235 return def_stmt_info;
236 return NULL;
239 /* Check whether NAME, an ssa-name used in STMT_VINFO,
240 is a result of a type promotion, such that:
241 DEF_STMT: NAME = NOP (name0)
242 If CHECK_SIGN is TRUE, check that either both types are signed or both are
243 unsigned. */
245 static bool
246 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
247 tree *orig_type, gimple **def_stmt, bool *promotion)
249 tree type = TREE_TYPE (name);
250 tree oprnd0;
251 enum vect_def_type dt;
253 stmt_vec_info def_stmt_info;
254 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
255 def_stmt))
256 return false;
258 if (dt != vect_internal_def
259 && dt != vect_external_def && dt != vect_constant_def)
260 return false;
262 if (!*def_stmt)
263 return false;
265 if (!is_gimple_assign (*def_stmt))
266 return false;
268 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
269 return false;
271 oprnd0 = gimple_assign_rhs1 (*def_stmt);
273 *orig_type = TREE_TYPE (oprnd0);
274 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
275 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
276 return false;
278 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
279 *promotion = true;
280 else
281 *promotion = false;
283 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
284 return false;
286 return true;
289 /* Holds information about an input operand after some sign changes
290 and type promotions have been peeled away. */
291 class vect_unpromoted_value {
292 public:
293 vect_unpromoted_value ();
295 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
297 /* The value obtained after peeling away zero or more casts. */
298 tree op;
300 /* The type of OP. */
301 tree type;
303 /* The definition type of OP. */
304 vect_def_type dt;
306 /* If OP is the result of peeling at least one cast, and if the cast
307 of OP itself is a vectorizable statement, CASTER identifies that
308 statement, otherwise it is null. */
309 stmt_vec_info caster;
312 inline vect_unpromoted_value::vect_unpromoted_value ()
313 : op (NULL_TREE),
314 type (NULL_TREE),
315 dt (vect_uninitialized_def),
316 caster (NULL)
320 /* Set the operand to OP_IN, its definition type to DT_IN, and the
321 statement that casts it to CASTER_IN. */
323 inline void
324 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
325 stmt_vec_info caster_in)
327 op = op_in;
328 type = TREE_TYPE (op);
329 dt = dt_in;
330 caster = caster_in;
333 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
334 to reach some vectorizable inner operand OP', continuing as long as it
335 is possible to convert OP' back to OP using a possible sign change
336 followed by a possible promotion P. Return this OP', or null if OP is
337 not a vectorizable SSA name. If there is a promotion P, describe its
338 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
339 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
340 have more than one user.
342 A successful return means that it is possible to go from OP' to OP
343 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
344 whereas the cast from UNPROM to OP might be a promotion, a sign
345 change, or a nop.
347 E.g. say we have:
349 signed short *ptr = ...;
350 signed short C = *ptr;
351 unsigned short B = (unsigned short) C; // sign change
352 signed int A = (signed int) B; // unsigned promotion
353 ...possible other uses of A...
354 unsigned int OP = (unsigned int) A; // sign change
356 In this case it's possible to go directly from C to OP using:
358 OP = (unsigned int) (unsigned short) C;
359 +------------+ +--------------+
360 promotion sign change
362 so OP' would be C. The input to the promotion is B, so UNPROM
363 would describe B. */
365 static tree
366 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
367 vect_unpromoted_value *unprom,
368 bool *single_use_p = NULL)
370 tree res = NULL_TREE;
371 tree op_type = TREE_TYPE (op);
372 unsigned int orig_precision = TYPE_PRECISION (op_type);
373 unsigned int min_precision = orig_precision;
374 stmt_vec_info caster = NULL;
375 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
377 /* See whether OP is simple enough to vectorize. */
378 stmt_vec_info def_stmt_info;
379 gimple *def_stmt;
380 vect_def_type dt;
381 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
382 break;
384 /* If OP is the input of a demotion, skip over it to see whether
385 OP is itself the result of a promotion. If so, the combined
386 effect of the promotion and the demotion might fit the required
387 pattern, otherwise neither operation fits.
389 This copes with cases such as the result of an arithmetic
390 operation being truncated before being stored, and where that
391 arithmetic operation has been recognized as an over-widened one. */
392 if (TYPE_PRECISION (op_type) <= min_precision)
394 /* Use OP as the UNPROM described above if we haven't yet
395 found a promotion, or if using the new input preserves the
396 sign of the previous promotion. */
397 if (!res
398 || TYPE_PRECISION (unprom->type) == orig_precision
399 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
401 unprom->set_op (op, dt, caster);
402 min_precision = TYPE_PRECISION (op_type);
404 /* Stop if we've already seen a promotion and if this
405 conversion does more than change the sign. */
406 else if (TYPE_PRECISION (op_type)
407 != TYPE_PRECISION (unprom->type))
408 break;
410 /* The sequence now extends to OP. */
411 res = op;
414 /* See whether OP is defined by a cast. Record it as CASTER if
415 the cast is potentially vectorizable. */
416 if (!def_stmt)
417 break;
418 caster = def_stmt_info;
420 /* Ignore pattern statements, since we don't link uses for them. */
421 if (caster
422 && single_use_p
423 && !STMT_VINFO_RELATED_STMT (caster)
424 && !has_single_use (res))
425 *single_use_p = false;
427 gassign *assign = dyn_cast <gassign *> (def_stmt);
428 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
429 break;
431 /* Continue with the input to the cast. */
432 op = gimple_assign_rhs1 (def_stmt);
433 op_type = TREE_TYPE (op);
435 return res;
438 /* OP is an integer operand to an operation that returns TYPE, and we
439 want to treat the operation as a widening one. So far we can treat
440 it as widening from *COMMON_TYPE.
442 Return true if OP is suitable for such a widening operation,
443 either widening from *COMMON_TYPE or from some supertype of it.
444 Update *COMMON_TYPE to the supertype in the latter case.
446 SHIFT_P is true if OP is a shift amount. */
448 static bool
449 vect_joust_widened_integer (tree type, bool shift_p, tree op,
450 tree *common_type)
452 /* Calculate the minimum precision required by OP, without changing
453 the sign of either operand. */
454 unsigned int precision;
455 if (shift_p)
457 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
458 return false;
459 precision = TREE_INT_CST_LOW (op);
461 else
463 precision = wi::min_precision (wi::to_widest (op),
464 TYPE_SIGN (*common_type));
465 if (precision * 2 > TYPE_PRECISION (type))
466 return false;
469 /* If OP requires a wider type, switch to that type. The checks
470 above ensure that this is still narrower than the result. */
471 precision = vect_element_precision (precision);
472 if (TYPE_PRECISION (*common_type) < precision)
473 *common_type = build_nonstandard_integer_type
474 (precision, TYPE_UNSIGNED (*common_type));
475 return true;
478 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
479 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
481 static bool
482 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
484 if (types_compatible_p (*common_type, new_type))
485 return true;
487 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
488 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
489 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
490 return true;
492 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
493 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
494 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
496 *common_type = new_type;
497 return true;
500 /* We have mismatched signs, with the signed type being
501 no wider than the unsigned type. In this case we need
502 a wider signed type. */
503 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
504 TYPE_PRECISION (new_type));
505 precision *= 2;
506 if (precision * 2 > TYPE_PRECISION (type))
507 return false;
509 *common_type = build_nonstandard_integer_type (precision, false);
510 return true;
513 /* Check whether STMT_INFO can be viewed as a tree of integer operations
514 in which each node either performs CODE or WIDENED_CODE, and where
515 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
516 specifies the maximum number of leaf operands. SHIFT_P says whether
517 CODE and WIDENED_CODE are some sort of shift.
519 If STMT_INFO is such a tree, return the number of leaf operands
520 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
521 to a type that (a) is narrower than the result of STMT_INFO and
522 (b) can hold all leaf operand values.
524 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
525 exists. */
527 static unsigned int
528 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
529 tree_code widened_code, bool shift_p,
530 unsigned int max_nops,
531 vect_unpromoted_value *unprom, tree *common_type)
533 /* Check for an integer operation with the right code. */
534 vec_info *vinfo = stmt_info->vinfo;
535 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
536 if (!assign)
537 return 0;
539 tree_code rhs_code = gimple_assign_rhs_code (assign);
540 if (rhs_code != code && rhs_code != widened_code)
541 return 0;
543 tree type = gimple_expr_type (assign);
544 if (!INTEGRAL_TYPE_P (type))
545 return 0;
547 /* Assume that both operands will be leaf operands. */
548 max_nops -= 2;
550 /* Check the operands. */
551 unsigned int next_op = 0;
552 for (unsigned int i = 0; i < 2; ++i)
554 vect_unpromoted_value *this_unprom = &unprom[next_op];
555 unsigned int nops = 1;
556 tree op = gimple_op (assign, i + 1);
557 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
559 /* We already have a common type from earlier operands.
560 Update it to account for OP. */
561 this_unprom->set_op (op, vect_constant_def);
562 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
563 return 0;
565 else
567 /* Only allow shifts by constants. */
568 if (shift_p && i == 1)
569 return 0;
571 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
572 this_unprom))
573 return 0;
575 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
577 /* The operand isn't widened. If STMT_INFO has the code
578 for an unwidened operation, recursively check whether
579 this operand is a node of the tree. */
580 if (rhs_code != code
581 || max_nops == 0
582 || this_unprom->dt != vect_internal_def)
583 return 0;
585 /* Give back the leaf slot allocated above now that we're
586 not treating this as a leaf operand. */
587 max_nops += 1;
589 /* Recursively process the definition of the operand. */
590 stmt_vec_info def_stmt_info
591 = vinfo->lookup_def (this_unprom->op);
592 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
593 shift_p, max_nops, this_unprom,
594 common_type);
595 if (nops == 0)
596 return 0;
598 max_nops -= nops;
600 else
602 /* Make sure that the operand is narrower than the result. */
603 if (TYPE_PRECISION (this_unprom->type) * 2
604 > TYPE_PRECISION (type))
605 return 0;
607 /* Update COMMON_TYPE for the new operand. */
608 if (i == 0)
609 *common_type = this_unprom->type;
610 else if (!vect_joust_widened_type (type, this_unprom->type,
611 common_type))
612 return 0;
615 next_op += nops;
617 return next_op;
620 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
621 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
623 static tree
624 vect_recog_temp_ssa_var (tree type, gimple *stmt)
626 return make_temp_ssa_name (type, stmt, "patt");
629 /* STMT2_INFO describes a type conversion that could be split into STMT1
630 followed by a version of STMT2_INFO that takes NEW_RHS as its first
631 input. Try to do this using pattern statements, returning true on
632 success. */
634 static bool
635 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
636 gimple *stmt1, tree vectype)
638 vec_info *vinfo = stmt2_info->vinfo;
639 if (is_pattern_stmt_p (stmt2_info))
641 /* STMT2_INFO is part of a pattern. Get the statement to which
642 the pattern is attached. */
643 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
644 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
646 if (dump_enabled_p ())
647 dump_printf_loc (MSG_NOTE, vect_location,
648 "Splitting pattern statement: %G", stmt2_info->stmt);
650 /* Since STMT2_INFO is a pattern statement, we can change it
651 in-situ without worrying about changing the code for the
652 containing block. */
653 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
655 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
658 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
659 stmt2_info->stmt);
662 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
663 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
664 /* STMT2_INFO is the actual pattern statement. Add STMT1
665 to the end of the definition sequence. */
666 gimple_seq_add_stmt_without_update (def_seq, stmt1);
667 else
669 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
670 before it. */
671 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
672 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
674 return true;
676 else
678 /* STMT2_INFO doesn't yet have a pattern. Try to create a
679 two-statement pattern now. */
680 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
681 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
682 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
683 if (!lhs_vectype)
684 return false;
686 if (dump_enabled_p ())
687 dump_printf_loc (MSG_NOTE, vect_location,
688 "Splitting statement: %G", stmt2_info->stmt);
690 /* Add STMT1 as a singleton pattern definition sequence. */
691 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
692 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
693 gimple_seq_add_stmt_without_update (def_seq, stmt1);
695 /* Build the second of the two pattern statements. */
696 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
697 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
698 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
700 if (dump_enabled_p ())
702 dump_printf_loc (MSG_NOTE, vect_location,
703 "into pattern statements: %G", stmt1);
704 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
707 return true;
711 /* Convert UNPROM to TYPE and return the result, adding new statements
712 to STMT_INFO's pattern definition statements if no better way is
713 available. VECTYPE is the vector form of TYPE. */
715 static tree
716 vect_convert_input (stmt_vec_info stmt_info, tree type,
717 vect_unpromoted_value *unprom, tree vectype)
719 vec_info *vinfo = stmt_info->vinfo;
721 /* Check for a no-op conversion. */
722 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
723 return unprom->op;
725 /* Allow the caller to create constant vect_unpromoted_values. */
726 if (TREE_CODE (unprom->op) == INTEGER_CST)
727 return wide_int_to_tree (type, wi::to_widest (unprom->op));
729 tree input = unprom->op;
730 if (unprom->caster)
732 tree lhs = gimple_get_lhs (unprom->caster->stmt);
733 tree lhs_type = TREE_TYPE (lhs);
735 /* If the result of the existing cast is the right width, use it
736 instead of the source of the cast. */
737 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
738 input = lhs;
739 /* If the precision we want is between the source and result
740 precisions of the existing cast, try splitting the cast into
741 two and tapping into a mid-way point. */
742 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
743 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
745 /* In order to preserve the semantics of the original cast,
746 give the mid-way point the same signedness as the input value.
748 It would be possible to use a signed type here instead if
749 TYPE is signed and UNPROM->TYPE is unsigned, but that would
750 make the sign of the midtype sensitive to the order in
751 which we process the statements, since the signedness of
752 TYPE is the signedness required by just one of possibly
753 many users. Also, unsigned promotions are usually as cheap
754 as or cheaper than signed ones, so it's better to keep an
755 unsigned promotion. */
756 tree midtype = build_nonstandard_integer_type
757 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
758 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
759 if (vec_midtype)
761 input = vect_recog_temp_ssa_var (midtype, NULL);
762 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
763 unprom->op);
764 if (!vect_split_statement (unprom->caster, input, new_stmt,
765 vec_midtype))
766 append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
770 /* See if we can reuse an existing result. */
771 if (types_compatible_p (type, TREE_TYPE (input)))
772 return input;
775 /* We need a new conversion statement. */
776 tree new_op = vect_recog_temp_ssa_var (type, NULL);
777 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
779 /* If OP is an external value, see if we can insert the new statement
780 on an incoming edge. */
781 if (input == unprom->op && unprom->dt == vect_external_def)
782 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
784 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
785 gcc_assert (!new_bb);
786 return new_op;
789 /* As a (common) last resort, add the statement to the pattern itself. */
790 append_pattern_def_seq (stmt_info, new_stmt, vectype);
791 return new_op;
794 /* Invoke vect_convert_input for N elements of UNPROM and store the
795 result in the corresponding elements of RESULT. */
797 static void
798 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
799 tree *result, tree type, vect_unpromoted_value *unprom,
800 tree vectype)
802 for (unsigned int i = 0; i < n; ++i)
804 unsigned int j;
805 for (j = 0; j < i; ++j)
806 if (unprom[j].op == unprom[i].op)
807 break;
808 if (j < i)
809 result[i] = result[j];
810 else
811 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
815 /* The caller has created a (possibly empty) sequence of pattern definition
816 statements followed by a single statement PATTERN_STMT. Cast the result
817 of this final statement to TYPE. If a new statement is needed, add
818 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
819 and return the new statement, otherwise return PATTERN_STMT as-is.
820 VECITYPE is the vector form of PATTERN_STMT's result type. */
822 static gimple *
823 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
824 tree vecitype)
826 tree lhs = gimple_get_lhs (pattern_stmt);
827 if (!types_compatible_p (type, TREE_TYPE (lhs)))
829 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
830 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
831 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
833 return pattern_stmt;
836 /* Return true if STMT_VINFO describes a reduction for which reassociation
837 is allowed. If STMT_INFO is part of a group, assume that it's part of
838 a reduction chain and optimistically assume that all statements
839 except the last allow reassociation.
840 Also require it to have code CODE and to be a reduction
841 in the outermost loop. When returning true, store the operands in
842 *OP0_OUT and *OP1_OUT. */
844 static bool
845 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
846 tree *op0_out, tree *op1_out)
848 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
849 if (!loop_info)
850 return false;
852 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
853 if (!assign || gimple_assign_rhs_code (assign) != code)
854 return false;
856 /* We don't allow changing the order of the computation in the inner-loop
857 when doing outer-loop vectorization. */
858 class loop *loop = LOOP_VINFO_LOOP (loop_info);
859 if (loop && nested_in_vect_loop_p (loop, stmt_info))
860 return false;
862 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
864 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
865 code))
866 return false;
868 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
869 return false;
871 *op0_out = gimple_assign_rhs1 (assign);
872 *op1_out = gimple_assign_rhs2 (assign);
873 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
874 std::swap (*op0_out, *op1_out);
875 return true;
878 /* Function vect_recog_dot_prod_pattern
880 Try to find the following pattern:
882 type x_t, y_t;
883 TYPE1 prod;
884 TYPE2 sum = init;
885 loop:
886 sum_0 = phi <init, sum_1>
887 S1 x_t = ...
888 S2 y_t = ...
889 S3 x_T = (TYPE1) x_t;
890 S4 y_T = (TYPE1) y_t;
891 S5 prod = x_T * y_T;
892 [S6 prod = (TYPE2) prod; #optional]
893 S7 sum_1 = prod + sum_0;
895 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
896 same size of 'TYPE1' or bigger. This is a special case of a reduction
897 computation.
899 Input:
901 * STMT_VINFO: The stmt from which the pattern search begins. In the
902 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
903 will be detected.
905 Output:
907 * TYPE_OUT: The type of the output of this pattern.
909 * Return value: A new stmt that will be used to replace the sequence of
910 stmts that constitute the pattern. In this case it will be:
911 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
913 Note: The dot-prod idiom is a widening reduction pattern that is
914 vectorized without preserving all the intermediate results. It
915 produces only N/2 (widened) results (by summing up pairs of
916 intermediate results) rather than all N results. Therefore, we
917 cannot allow this pattern when we want to get all the results and in
918 the correct order (as is the case when this computation is in an
919 inner-loop nested in an outer-loop that us being vectorized). */
921 static gimple *
922 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
924 tree oprnd0, oprnd1;
925 gimple *last_stmt = stmt_vinfo->stmt;
926 vec_info *vinfo = stmt_vinfo->vinfo;
927 tree type, half_type;
928 gimple *pattern_stmt;
929 tree var;
931 /* Look for the following pattern
932 DX = (TYPE1) X;
933 DY = (TYPE1) Y;
934 DPROD = DX * DY;
935 DDPROD = (TYPE2) DPROD;
936 sum_1 = DDPROD + sum_0;
937 In which
938 - DX is double the size of X
939 - DY is double the size of Y
940 - DX, DY, DPROD all have the same type
941 - sum is the same size of DPROD or bigger
942 - sum has been recognized as a reduction variable.
944 This is equivalent to:
945 DPROD = X w* Y; #widen mult
946 sum_1 = DPROD w+ sum_0; #widen summation
948 DPROD = X w* Y; #widen mult
949 sum_1 = DPROD + sum_0; #summation
952 /* Starting from LAST_STMT, follow the defs of its uses in search
953 of the above pattern. */
955 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
956 &oprnd0, &oprnd1))
957 return NULL;
959 type = gimple_expr_type (last_stmt);
961 vect_unpromoted_value unprom_mult;
962 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
964 /* So far so good. Since last_stmt was detected as a (summation) reduction,
965 we know that oprnd1 is the reduction variable (defined by a loop-header
966 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
967 Left to check that oprnd0 is defined by a (widen_)mult_expr */
968 if (!oprnd0)
969 return NULL;
971 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
972 if (!mult_vinfo)
973 return NULL;
975 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
976 inside the loop (in case we are analyzing an outer-loop). */
977 vect_unpromoted_value unprom0[2];
978 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
979 false, 2, unprom0, &half_type))
980 return NULL;
982 /* If there are two widening operations, make sure they agree on
983 the sign of the extension. */
984 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
985 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
986 return NULL;
988 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
990 tree half_vectype;
991 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
992 type_out, &half_vectype))
993 return NULL;
995 /* Get the inputs in the appropriate types. */
996 tree mult_oprnd[2];
997 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
998 unprom0, half_vectype);
1000 var = vect_recog_temp_ssa_var (type, NULL);
1001 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1002 mult_oprnd[0], mult_oprnd[1], oprnd1);
1004 return pattern_stmt;
1008 /* Function vect_recog_sad_pattern
1010 Try to find the following Sum of Absolute Difference (SAD) pattern:
1012 type x_t, y_t;
1013 signed TYPE1 diff, abs_diff;
1014 TYPE2 sum = init;
1015 loop:
1016 sum_0 = phi <init, sum_1>
1017 S1 x_t = ...
1018 S2 y_t = ...
1019 S3 x_T = (TYPE1) x_t;
1020 S4 y_T = (TYPE1) y_t;
1021 S5 diff = x_T - y_T;
1022 S6 abs_diff = ABS_EXPR <diff>;
1023 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1024 S8 sum_1 = abs_diff + sum_0;
1026 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1027 same size of 'TYPE1' or bigger. This is a special case of a reduction
1028 computation.
1030 Input:
1032 * STMT_VINFO: The stmt from which the pattern search begins. In the
1033 example, when this function is called with S8, the pattern
1034 {S3,S4,S5,S6,S7,S8} will be detected.
1036 Output:
1038 * TYPE_OUT: The type of the output of this pattern.
1040 * Return value: A new stmt that will be used to replace the sequence of
1041 stmts that constitute the pattern. In this case it will be:
1042 SAD_EXPR <x_t, y_t, sum_0>
1045 static gimple *
1046 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1048 gimple *last_stmt = stmt_vinfo->stmt;
1049 vec_info *vinfo = stmt_vinfo->vinfo;
1050 tree half_type;
1052 /* Look for the following pattern
1053 DX = (TYPE1) X;
1054 DY = (TYPE1) Y;
1055 DDIFF = DX - DY;
1056 DAD = ABS_EXPR <DDIFF>;
1057 DDPROD = (TYPE2) DPROD;
1058 sum_1 = DAD + sum_0;
1059 In which
1060 - DX is at least double the size of X
1061 - DY is at least double the size of Y
1062 - DX, DY, DDIFF, DAD all have the same type
1063 - sum is the same size of DAD or bigger
1064 - sum has been recognized as a reduction variable.
1066 This is equivalent to:
1067 DDIFF = X w- Y; #widen sub
1068 DAD = ABS_EXPR <DDIFF>;
1069 sum_1 = DAD w+ sum_0; #widen summation
1071 DDIFF = X w- Y; #widen sub
1072 DAD = ABS_EXPR <DDIFF>;
1073 sum_1 = DAD + sum_0; #summation
1076 /* Starting from LAST_STMT, follow the defs of its uses in search
1077 of the above pattern. */
1079 tree plus_oprnd0, plus_oprnd1;
1080 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1081 &plus_oprnd0, &plus_oprnd1))
1082 return NULL;
1084 tree sum_type = gimple_expr_type (last_stmt);
1086 /* Any non-truncating sequence of conversions is OK here, since
1087 with a successful match, the result of the ABS(U) is known to fit
1088 within the nonnegative range of the result type. (It cannot be the
1089 negative of the minimum signed value due to the range of the widening
1090 MINUS_EXPR.) */
1091 vect_unpromoted_value unprom_abs;
1092 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1093 &unprom_abs);
1095 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1096 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1097 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1098 Then check that plus_oprnd0 is defined by an abs_expr. */
1100 if (!plus_oprnd0)
1101 return NULL;
1103 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1104 if (!abs_stmt_vinfo)
1105 return NULL;
1107 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1108 inside the loop (in case we are analyzing an outer-loop). */
1109 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1110 if (!abs_stmt
1111 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1112 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1113 return NULL;
1115 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1116 tree abs_type = TREE_TYPE (abs_oprnd);
1117 if (TYPE_UNSIGNED (abs_type))
1118 return NULL;
1120 /* Peel off conversions from the ABS input. This can involve sign
1121 changes (e.g. from an unsigned subtraction to a signed ABS input)
1122 or signed promotion, but it can't include unsigned promotion.
1123 (Note that ABS of an unsigned promotion should have been folded
1124 away before now anyway.) */
1125 vect_unpromoted_value unprom_diff;
1126 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1127 &unprom_diff);
1128 if (!abs_oprnd)
1129 return NULL;
1130 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1131 && TYPE_UNSIGNED (unprom_diff.type))
1132 return NULL;
1134 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1135 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1136 if (!diff_stmt_vinfo)
1137 return NULL;
1139 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1140 inside the loop (in case we are analyzing an outer-loop). */
1141 vect_unpromoted_value unprom[2];
1142 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1143 false, 2, unprom, &half_type))
1144 return NULL;
1146 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1148 tree half_vectype;
1149 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1150 type_out, &half_vectype))
1151 return NULL;
1153 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1154 tree sad_oprnd[2];
1155 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1156 unprom, half_vectype);
1158 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1159 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1160 sad_oprnd[1], plus_oprnd1);
1162 return pattern_stmt;
1165 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1166 so that it can be treated as though it had the form:
1168 A_TYPE a;
1169 B_TYPE b;
1170 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1171 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1172 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1173 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1174 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1176 Try to replace the pattern with:
1178 A_TYPE a;
1179 B_TYPE b;
1180 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1181 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1182 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1183 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1185 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1187 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1188 name of the pattern being matched, for dump purposes. */
1190 static gimple *
1191 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1192 tree_code orig_code, tree_code wide_code,
1193 bool shift_p, const char *name)
1195 vec_info *vinfo = last_stmt_info->vinfo;
1196 gimple *last_stmt = last_stmt_info->stmt;
1198 vect_unpromoted_value unprom[2];
1199 tree half_type;
1200 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1201 shift_p, 2, unprom, &half_type))
1202 return NULL;
1204 /* Pattern detected. */
1205 vect_pattern_detected (name, last_stmt);
1207 tree type = gimple_expr_type (last_stmt);
1208 tree itype = type;
1209 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1210 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1211 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1212 TYPE_UNSIGNED (half_type));
1214 /* Check target support */
1215 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1216 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1217 enum tree_code dummy_code;
1218 int dummy_int;
1219 auto_vec<tree> dummy_vec;
1220 if (!vectype
1221 || !vecitype
1222 || !supportable_widening_operation (wide_code, last_stmt_info,
1223 vecitype, vectype,
1224 &dummy_code, &dummy_code,
1225 &dummy_int, &dummy_vec))
1226 return NULL;
1228 *type_out = get_vectype_for_scalar_type (vinfo, type);
1229 if (!*type_out)
1230 return NULL;
1232 tree oprnd[2];
1233 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1235 tree var = vect_recog_temp_ssa_var (itype, NULL);
1236 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1237 oprnd[0], oprnd[1]);
1239 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1242 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1243 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1245 static gimple *
1246 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1248 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1249 WIDEN_MULT_EXPR, false,
1250 "vect_recog_widen_mult_pattern");
1253 /* Function vect_recog_pow_pattern
1255 Try to find the following pattern:
1257 x = POW (y, N);
1259 with POW being one of pow, powf, powi, powif and N being
1260 either 2 or 0.5.
1262 Input:
1264 * STMT_VINFO: The stmt from which the pattern search begins.
1266 Output:
1268 * TYPE_OUT: The type of the output of this pattern.
1270 * Return value: A new stmt that will be used to replace the sequence of
1271 stmts that constitute the pattern. In this case it will be:
1272 x = x * x
1274 x = sqrt (x)
1277 static gimple *
1278 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1280 vec_info *vinfo = stmt_vinfo->vinfo;
1281 gimple *last_stmt = stmt_vinfo->stmt;
1282 tree base, exp;
1283 gimple *stmt;
1284 tree var;
1286 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1287 return NULL;
1289 switch (gimple_call_combined_fn (last_stmt))
1291 CASE_CFN_POW:
1292 CASE_CFN_POWI:
1293 break;
1295 default:
1296 return NULL;
1299 base = gimple_call_arg (last_stmt, 0);
1300 exp = gimple_call_arg (last_stmt, 1);
1301 if (TREE_CODE (exp) != REAL_CST
1302 && TREE_CODE (exp) != INTEGER_CST)
1304 if (flag_unsafe_math_optimizations
1305 && TREE_CODE (base) == REAL_CST
1306 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1308 combined_fn log_cfn;
1309 built_in_function exp_bfn;
1310 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1312 case BUILT_IN_POW:
1313 log_cfn = CFN_BUILT_IN_LOG;
1314 exp_bfn = BUILT_IN_EXP;
1315 break;
1316 case BUILT_IN_POWF:
1317 log_cfn = CFN_BUILT_IN_LOGF;
1318 exp_bfn = BUILT_IN_EXPF;
1319 break;
1320 case BUILT_IN_POWL:
1321 log_cfn = CFN_BUILT_IN_LOGL;
1322 exp_bfn = BUILT_IN_EXPL;
1323 break;
1324 default:
1325 return NULL;
1327 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1328 tree exp_decl = builtin_decl_implicit (exp_bfn);
1329 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1330 does that, but if C is a power of 2, we want to use
1331 exp2 (log2 (C) * x) in the non-vectorized version, but for
1332 vectorization we don't have vectorized exp2. */
1333 if (logc
1334 && TREE_CODE (logc) == REAL_CST
1335 && exp_decl
1336 && lookup_attribute ("omp declare simd",
1337 DECL_ATTRIBUTES (exp_decl)))
1339 cgraph_node *node = cgraph_node::get_create (exp_decl);
1340 if (node->simd_clones == NULL)
1342 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1343 || node->definition)
1344 return NULL;
1345 expand_simd_clones (node);
1346 if (node->simd_clones == NULL)
1347 return NULL;
1349 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1350 if (!*type_out)
1351 return NULL;
1352 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1353 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1354 append_pattern_def_seq (stmt_vinfo, g);
1355 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1356 g = gimple_build_call (exp_decl, 1, def);
1357 gimple_call_set_lhs (g, res);
1358 return g;
1362 return NULL;
1365 /* We now have a pow or powi builtin function call with a constant
1366 exponent. */
1368 /* Catch squaring. */
1369 if ((tree_fits_shwi_p (exp)
1370 && tree_to_shwi (exp) == 2)
1371 || (TREE_CODE (exp) == REAL_CST
1372 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1374 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1375 TREE_TYPE (base), type_out))
1376 return NULL;
1378 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1379 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1380 return stmt;
1383 /* Catch square root. */
1384 if (TREE_CODE (exp) == REAL_CST
1385 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1387 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1388 if (*type_out
1389 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1390 OPTIMIZE_FOR_SPEED))
1392 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1393 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1394 gimple_call_set_lhs (stmt, var);
1395 gimple_call_set_nothrow (stmt, true);
1396 return stmt;
1400 return NULL;
1404 /* Function vect_recog_widen_sum_pattern
1406 Try to find the following pattern:
1408 type x_t;
1409 TYPE x_T, sum = init;
1410 loop:
1411 sum_0 = phi <init, sum_1>
1412 S1 x_t = *p;
1413 S2 x_T = (TYPE) x_t;
1414 S3 sum_1 = x_T + sum_0;
1416 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1417 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1418 a special case of a reduction computation.
1420 Input:
1422 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1423 when this function is called with S3, the pattern {S2,S3} will be detected.
1425 Output:
1427 * TYPE_OUT: The type of the output of this pattern.
1429 * Return value: A new stmt that will be used to replace the sequence of
1430 stmts that constitute the pattern. In this case it will be:
1431 WIDEN_SUM <x_t, sum_0>
1433 Note: The widening-sum idiom is a widening reduction pattern that is
1434 vectorized without preserving all the intermediate results. It
1435 produces only N/2 (widened) results (by summing up pairs of
1436 intermediate results) rather than all N results. Therefore, we
1437 cannot allow this pattern when we want to get all the results and in
1438 the correct order (as is the case when this computation is in an
1439 inner-loop nested in an outer-loop that us being vectorized). */
1441 static gimple *
1442 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1444 gimple *last_stmt = stmt_vinfo->stmt;
1445 tree oprnd0, oprnd1;
1446 vec_info *vinfo = stmt_vinfo->vinfo;
1447 tree type;
1448 gimple *pattern_stmt;
1449 tree var;
1451 /* Look for the following pattern
1452 DX = (TYPE) X;
1453 sum_1 = DX + sum_0;
1454 In which DX is at least double the size of X, and sum_1 has been
1455 recognized as a reduction variable.
1458 /* Starting from LAST_STMT, follow the defs of its uses in search
1459 of the above pattern. */
1461 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1462 &oprnd0, &oprnd1))
1463 return NULL;
1465 type = gimple_expr_type (last_stmt);
1467 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1468 we know that oprnd1 is the reduction variable (defined by a loop-header
1469 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1470 Left to check that oprnd0 is defined by a cast from type 'type' to type
1471 'TYPE'. */
1473 vect_unpromoted_value unprom0;
1474 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1475 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1476 return NULL;
1478 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1480 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1481 unprom0.type, type_out))
1482 return NULL;
1484 var = vect_recog_temp_ssa_var (type, NULL);
1485 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1487 return pattern_stmt;
1490 /* Recognize cases in which an operation is performed in one type WTYPE
1491 but could be done more efficiently in a narrower type NTYPE. For example,
1492 if we have:
1494 ATYPE a; // narrower than NTYPE
1495 BTYPE b; // narrower than NTYPE
1496 WTYPE aw = (WTYPE) a;
1497 WTYPE bw = (WTYPE) b;
1498 WTYPE res = aw + bw; // only uses of aw and bw
1500 then it would be more efficient to do:
1502 NTYPE an = (NTYPE) a;
1503 NTYPE bn = (NTYPE) b;
1504 NTYPE resn = an + bn;
1505 WTYPE res = (WTYPE) resn;
1507 Other situations include things like:
1509 ATYPE a; // NTYPE or narrower
1510 WTYPE aw = (WTYPE) a;
1511 WTYPE res = aw + b;
1513 when only "(NTYPE) res" is significant. In that case it's more efficient
1514 to truncate "b" and do the operation on NTYPE instead:
1516 NTYPE an = (NTYPE) a;
1517 NTYPE bn = (NTYPE) b; // truncation
1518 NTYPE resn = an + bn;
1519 WTYPE res = (WTYPE) resn;
1521 All users of "res" should then use "resn" instead, making the final
1522 statement dead (not marked as relevant). The final statement is still
1523 needed to maintain the type correctness of the IR.
1525 vect_determine_precisions has already determined the minimum
1526 precison of the operation and the minimum precision required
1527 by users of the result. */
1529 static gimple *
1530 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1532 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1533 if (!last_stmt)
1534 return NULL;
1536 /* See whether we have found that this operation can be done on a
1537 narrower type without changing its semantics. */
1538 unsigned int new_precision = last_stmt_info->operation_precision;
1539 if (!new_precision)
1540 return NULL;
1542 vec_info *vinfo = last_stmt_info->vinfo;
1543 tree lhs = gimple_assign_lhs (last_stmt);
1544 tree type = TREE_TYPE (lhs);
1545 tree_code code = gimple_assign_rhs_code (last_stmt);
1547 /* Keep the first operand of a COND_EXPR as-is: only the other two
1548 operands are interesting. */
1549 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1551 /* Check the operands. */
1552 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1553 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1554 unprom.quick_grow (nops);
1555 unsigned int min_precision = 0;
1556 bool single_use_p = false;
1557 for (unsigned int i = 0; i < nops; ++i)
1559 tree op = gimple_op (last_stmt, first_op + i);
1560 if (TREE_CODE (op) == INTEGER_CST)
1561 unprom[i].set_op (op, vect_constant_def);
1562 else if (TREE_CODE (op) == SSA_NAME)
1564 bool op_single_use_p = true;
1565 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1566 &op_single_use_p))
1567 return NULL;
1568 /* If:
1570 (1) N bits of the result are needed;
1571 (2) all inputs are widened from M<N bits; and
1572 (3) one operand OP is a single-use SSA name
1574 we can shift the M->N widening from OP to the output
1575 without changing the number or type of extensions involved.
1576 This then reduces the number of copies of STMT_INFO.
1578 If instead of (3) more than one operand is a single-use SSA name,
1579 shifting the extension to the output is even more of a win.
1581 If instead:
1583 (1) N bits of the result are needed;
1584 (2) one operand OP2 is widened from M2<N bits;
1585 (3) another operand OP1 is widened from M1<M2 bits; and
1586 (4) both OP1 and OP2 are single-use
1588 the choice is between:
1590 (a) truncating OP2 to M1, doing the operation on M1,
1591 and then widening the result to N
1593 (b) widening OP1 to M2, doing the operation on M2, and then
1594 widening the result to N
1596 Both shift the M2->N widening of the inputs to the output.
1597 (a) additionally shifts the M1->M2 widening to the output;
1598 it requires fewer copies of STMT_INFO but requires an extra
1599 M2->M1 truncation.
1601 Which is better will depend on the complexity and cost of
1602 STMT_INFO, which is hard to predict at this stage. However,
1603 a clear tie-breaker in favor of (b) is the fact that the
1604 truncation in (a) increases the length of the operation chain.
1606 If instead of (4) only one of OP1 or OP2 is single-use,
1607 (b) is still a win over doing the operation in N bits:
1608 it still shifts the M2->N widening on the single-use operand
1609 to the output and reduces the number of STMT_INFO copies.
1611 If neither operand is single-use then operating on fewer than
1612 N bits might lead to more extensions overall. Whether it does
1613 or not depends on global information about the vectorization
1614 region, and whether that's a good trade-off would again
1615 depend on the complexity and cost of the statements involved,
1616 as well as things like register pressure that are not normally
1617 modelled at this stage. We therefore ignore these cases
1618 and just optimize the clear single-use wins above.
1620 Thus we take the maximum precision of the unpromoted operands
1621 and record whether any operand is single-use. */
1622 if (unprom[i].dt == vect_internal_def)
1624 min_precision = MAX (min_precision,
1625 TYPE_PRECISION (unprom[i].type));
1626 single_use_p |= op_single_use_p;
1631 /* Although the operation could be done in operation_precision, we have
1632 to balance that against introducing extra truncations or extensions.
1633 Calculate the minimum precision that can be handled efficiently.
1635 The loop above determined that the operation could be handled
1636 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1637 extension from the inputs to the output without introducing more
1638 instructions, and would reduce the number of instructions required
1639 for STMT_INFO itself.
1641 vect_determine_precisions has also determined that the result only
1642 needs min_output_precision bits. Truncating by a factor of N times
1643 requires a tree of N - 1 instructions, so if TYPE is N times wider
1644 than min_output_precision, doing the operation in TYPE and truncating
1645 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1646 In contrast:
1648 - truncating the input to a unary operation and doing the operation
1649 in the new type requires at most N - 1 + 1 = N instructions per
1650 output vector
1652 - doing the same for a binary operation requires at most
1653 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1655 Both unary and binary operations require fewer instructions than
1656 this if the operands were extended from a suitable truncated form.
1657 Thus there is usually nothing to lose by doing operations in
1658 min_output_precision bits, but there can be something to gain. */
1659 if (!single_use_p)
1660 min_precision = last_stmt_info->min_output_precision;
1661 else
1662 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1664 /* Apply the minimum efficient precision we just calculated. */
1665 if (new_precision < min_precision)
1666 new_precision = min_precision;
1667 if (new_precision >= TYPE_PRECISION (type))
1668 return NULL;
1670 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1672 *type_out = get_vectype_for_scalar_type (vinfo, type);
1673 if (!*type_out)
1674 return NULL;
1676 /* We've found a viable pattern. Get the new type of the operation. */
1677 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1678 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1680 /* If we're truncating an operation, we need to make sure that we
1681 don't introduce new undefined overflow. The codes tested here are
1682 a subset of those accepted by vect_truncatable_operation_p. */
1683 tree op_type = new_type;
1684 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1685 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1686 op_type = build_nonstandard_integer_type (new_precision, true);
1688 /* We specifically don't check here whether the target supports the
1689 new operation, since it might be something that a later pattern
1690 wants to rewrite anyway. If targets have a minimum element size
1691 for some optabs, we should pattern-match smaller ops to larger ops
1692 where beneficial. */
1693 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1694 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1695 if (!new_vectype || !op_vectype)
1696 return NULL;
1698 if (dump_enabled_p ())
1699 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1700 type, new_type);
1702 /* Calculate the rhs operands for an operation on OP_TYPE. */
1703 tree ops[3] = {};
1704 for (unsigned int i = 1; i < first_op; ++i)
1705 ops[i - 1] = gimple_op (last_stmt, i);
1706 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1707 op_type, &unprom[0], op_vectype);
1709 /* Use the operation to produce a result of type OP_TYPE. */
1710 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1711 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1712 ops[0], ops[1], ops[2]);
1713 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1715 if (dump_enabled_p ())
1716 dump_printf_loc (MSG_NOTE, vect_location,
1717 "created pattern stmt: %G", pattern_stmt);
1719 /* Convert back to the original signedness, if OP_TYPE is different
1720 from NEW_TYPE. */
1721 if (op_type != new_type)
1722 pattern_stmt = vect_convert_output (last_stmt_info, new_type,
1723 pattern_stmt, op_vectype);
1725 /* Promote the result to the original type. */
1726 pattern_stmt = vect_convert_output (last_stmt_info, type,
1727 pattern_stmt, new_vectype);
1729 return pattern_stmt;
1732 /* Recognize the following patterns:
1734 ATYPE a; // narrower than TYPE
1735 BTYPE b; // narrower than TYPE
1737 1) Multiply high with scaling
1738 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1739 2) ... or also with rounding
1740 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1742 where only the bottom half of res is used. */
1744 static gimple *
1745 vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1747 /* Check for a right shift. */
1748 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1749 if (!last_stmt
1750 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1751 return NULL;
1752 vec_info *vinfo = last_stmt_info->vinfo;
1754 /* Check that the shift result is wider than the users of the
1755 result need (i.e. that narrowing would be a natural choice). */
1756 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1757 unsigned int target_precision
1758 = vect_element_precision (last_stmt_info->min_output_precision);
1759 if (!INTEGRAL_TYPE_P (lhs_type)
1760 || target_precision >= TYPE_PRECISION (lhs_type))
1761 return NULL;
1763 /* Look through any change in sign on the outer shift input. */
1764 vect_unpromoted_value unprom_rshift_input;
1765 tree rshift_input = vect_look_through_possible_promotion
1766 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1767 if (!rshift_input
1768 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1769 != TYPE_PRECISION (lhs_type))
1770 return NULL;
1772 /* Get the definition of the shift input. */
1773 stmt_vec_info rshift_input_stmt_info
1774 = vect_get_internal_def (vinfo, rshift_input);
1775 if (!rshift_input_stmt_info)
1776 return NULL;
1777 gassign *rshift_input_stmt
1778 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1779 if (!rshift_input_stmt)
1780 return NULL;
1782 stmt_vec_info mulh_stmt_info;
1783 tree scale_term;
1784 internal_fn ifn;
1785 unsigned int expect_offset;
1787 /* Check for the presence of the rounding term. */
1788 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1790 /* Check that the outer shift was by 1. */
1791 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1792 return NULL;
1794 /* Check that the second operand of the PLUS_EXPR is 1. */
1795 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1796 return NULL;
1798 /* Look through any change in sign on the addition input. */
1799 vect_unpromoted_value unprom_plus_input;
1800 tree plus_input = vect_look_through_possible_promotion
1801 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1802 if (!plus_input
1803 || TYPE_PRECISION (TREE_TYPE (plus_input))
1804 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
1805 return NULL;
1807 /* Get the definition of the multiply-high-scale part. */
1808 stmt_vec_info plus_input_stmt_info
1809 = vect_get_internal_def (vinfo, plus_input);
1810 if (!plus_input_stmt_info)
1811 return NULL;
1812 gassign *plus_input_stmt
1813 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1814 if (!plus_input_stmt
1815 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1816 return NULL;
1818 /* Look through any change in sign on the scaling input. */
1819 vect_unpromoted_value unprom_scale_input;
1820 tree scale_input = vect_look_through_possible_promotion
1821 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1822 if (!scale_input
1823 || TYPE_PRECISION (TREE_TYPE (scale_input))
1824 != TYPE_PRECISION (TREE_TYPE (plus_input)))
1825 return NULL;
1827 /* Get the definition of the multiply-high part. */
1828 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1829 if (!mulh_stmt_info)
1830 return NULL;
1832 /* Get the scaling term. */
1833 scale_term = gimple_assign_rhs2 (plus_input_stmt);
1835 expect_offset = target_precision + 2;
1836 ifn = IFN_MULHRS;
1838 else
1840 mulh_stmt_info = rshift_input_stmt_info;
1841 scale_term = gimple_assign_rhs2 (last_stmt);
1843 expect_offset = target_precision + 1;
1844 ifn = IFN_MULHS;
1847 /* Check that the scaling factor is correct. */
1848 if (TREE_CODE (scale_term) != INTEGER_CST
1849 || wi::to_widest (scale_term) + expect_offset
1850 != TYPE_PRECISION (lhs_type))
1851 return NULL;
1853 /* Check whether the scaling input term can be seen as two widened
1854 inputs multiplied together. */
1855 vect_unpromoted_value unprom_mult[2];
1856 tree new_type;
1857 unsigned int nops
1858 = vect_widened_op_tree (mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1859 false, 2, unprom_mult, &new_type);
1860 if (nops != 2)
1861 return NULL;
1863 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1865 /* Adjust output precision. */
1866 if (TYPE_PRECISION (new_type) < target_precision)
1867 new_type = build_nonstandard_integer_type
1868 (target_precision, TYPE_UNSIGNED (new_type));
1870 /* Check for target support. */
1871 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1872 if (!new_vectype
1873 || !direct_internal_fn_supported_p
1874 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1875 return NULL;
1877 /* The IR requires a valid vector type for the cast result, even though
1878 it's likely to be discarded. */
1879 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
1880 if (!*type_out)
1881 return NULL;
1883 /* Generate the IFN_MULHRS call. */
1884 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1885 tree new_ops[2];
1886 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1887 unprom_mult, new_vectype);
1888 gcall *mulhrs_stmt
1889 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1890 gimple_call_set_lhs (mulhrs_stmt, new_var);
1891 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1893 if (dump_enabled_p ())
1894 dump_printf_loc (MSG_NOTE, vect_location,
1895 "created pattern stmt: %G", mulhrs_stmt);
1897 return vect_convert_output (last_stmt_info, lhs_type,
1898 mulhrs_stmt, new_vectype);
1901 /* Recognize the patterns:
1903 ATYPE a; // narrower than TYPE
1904 BTYPE b; // narrower than TYPE
1905 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1906 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1908 where only the bottom half of avg is used. Try to transform them into:
1910 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1911 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1913 followed by:
1915 TYPE avg = (TYPE) avg';
1917 where NTYPE is no wider than half of TYPE. Since only the bottom half
1918 of avg is used, all or part of the cast of avg' should become redundant. */
1920 static gimple *
1921 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1923 /* Check for a shift right by one bit. */
1924 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1925 vec_info *vinfo = last_stmt_info->vinfo;
1926 if (!last_stmt
1927 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1928 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1929 return NULL;
1931 /* Check that the shift result is wider than the users of the
1932 result need (i.e. that narrowing would be a natural choice). */
1933 tree lhs = gimple_assign_lhs (last_stmt);
1934 tree type = TREE_TYPE (lhs);
1935 unsigned int target_precision
1936 = vect_element_precision (last_stmt_info->min_output_precision);
1937 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1938 return NULL;
1940 /* Look through any change in sign on the shift input. */
1941 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1942 vect_unpromoted_value unprom_plus;
1943 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1944 &unprom_plus);
1945 if (!rshift_rhs
1946 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1947 return NULL;
1949 /* Get the definition of the shift input. */
1950 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1951 if (!plus_stmt_info)
1952 return NULL;
1954 /* Check whether the shift input can be seen as a tree of additions on
1955 2 or 3 widened inputs.
1957 Note that the pattern should be a win even if the result of one or
1958 more additions is reused elsewhere: if the pattern matches, we'd be
1959 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1960 internal_fn ifn = IFN_AVG_FLOOR;
1961 vect_unpromoted_value unprom[3];
1962 tree new_type;
1963 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1964 PLUS_EXPR, false, 3,
1965 unprom, &new_type);
1966 if (nops == 0)
1967 return NULL;
1968 if (nops == 3)
1970 /* Check that one operand is 1. */
1971 unsigned int i;
1972 for (i = 0; i < 3; ++i)
1973 if (integer_onep (unprom[i].op))
1974 break;
1975 if (i == 3)
1976 return NULL;
1977 /* Throw away the 1 operand and keep the other two. */
1978 if (i < 2)
1979 unprom[i] = unprom[2];
1980 ifn = IFN_AVG_CEIL;
1983 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1985 /* We know that:
1987 (a) the operation can be viewed as:
1989 TYPE widened0 = (TYPE) UNPROM[0];
1990 TYPE widened1 = (TYPE) UNPROM[1];
1991 TYPE tmp1 = widened0 + widened1 {+ 1};
1992 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1994 (b) the first two statements are equivalent to:
1996 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1997 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1999 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2000 where sensible;
2002 (d) all the operations can be performed correctly at twice the width of
2003 NEW_TYPE, due to the nature of the average operation; and
2005 (e) users of the result of the right shift need only TARGET_PRECISION
2006 bits, where TARGET_PRECISION is no more than half of TYPE's
2007 precision.
2009 Under these circumstances, the only situation in which NEW_TYPE
2010 could be narrower than TARGET_PRECISION is if widened0, widened1
2011 and an addition result are all used more than once. Thus we can
2012 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2013 as "free", whereas widening the result of the average instruction
2014 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2015 therefore better not to go narrower than TARGET_PRECISION. */
2016 if (TYPE_PRECISION (new_type) < target_precision)
2017 new_type = build_nonstandard_integer_type (target_precision,
2018 TYPE_UNSIGNED (new_type));
2020 /* Check for target support. */
2021 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2022 if (!new_vectype
2023 || !direct_internal_fn_supported_p (ifn, new_vectype,
2024 OPTIMIZE_FOR_SPEED))
2025 return NULL;
2027 /* The IR requires a valid vector type for the cast result, even though
2028 it's likely to be discarded. */
2029 *type_out = get_vectype_for_scalar_type (vinfo, type);
2030 if (!*type_out)
2031 return NULL;
2033 /* Generate the IFN_AVG* call. */
2034 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2035 tree new_ops[2];
2036 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
2037 unprom, new_vectype);
2038 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2039 new_ops[1]);
2040 gimple_call_set_lhs (average_stmt, new_var);
2041 gimple_set_location (average_stmt, gimple_location (last_stmt));
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_NOTE, vect_location,
2045 "created pattern stmt: %G", average_stmt);
2047 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
2050 /* Recognize cases in which the input to a cast is wider than its
2051 output, and the input is fed by a widening operation. Fold this
2052 by removing the unnecessary intermediate widening. E.g.:
2054 unsigned char a;
2055 unsigned int b = (unsigned int) a;
2056 unsigned short c = (unsigned short) b;
2060 unsigned short c = (unsigned short) a;
2062 Although this is rare in input IR, it is an expected side-effect
2063 of the over-widening pattern above.
2065 This is beneficial also for integer-to-float conversions, if the
2066 widened integer has more bits than the float, and if the unwidened
2067 input doesn't. */
2069 static gimple *
2070 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
2072 /* Check for a cast, including an integer-to-float conversion. */
2073 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2074 if (!last_stmt)
2075 return NULL;
2076 tree_code code = gimple_assign_rhs_code (last_stmt);
2077 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2078 return NULL;
2080 /* Make sure that the rhs is a scalar with a natural bitsize. */
2081 tree lhs = gimple_assign_lhs (last_stmt);
2082 if (!lhs)
2083 return NULL;
2084 tree lhs_type = TREE_TYPE (lhs);
2085 scalar_mode lhs_mode;
2086 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2087 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2088 return NULL;
2090 /* Check for a narrowing operation (from a vector point of view). */
2091 tree rhs = gimple_assign_rhs1 (last_stmt);
2092 tree rhs_type = TREE_TYPE (rhs);
2093 if (!INTEGRAL_TYPE_P (rhs_type)
2094 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2095 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2096 return NULL;
2098 /* Try to find an unpromoted input. */
2099 vec_info *vinfo = last_stmt_info->vinfo;
2100 vect_unpromoted_value unprom;
2101 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2102 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2103 return NULL;
2105 /* If the bits above RHS_TYPE matter, make sure that they're the
2106 same when extending from UNPROM as they are when extending from RHS. */
2107 if (!INTEGRAL_TYPE_P (lhs_type)
2108 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2109 return NULL;
2111 /* We can get the same result by casting UNPROM directly, to avoid
2112 the unnecessary widening and narrowing. */
2113 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2115 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2116 if (!*type_out)
2117 return NULL;
2119 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2120 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2121 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2123 return pattern_stmt;
2126 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2127 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2129 static gimple *
2130 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
2132 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
2133 WIDEN_LSHIFT_EXPR, true,
2134 "vect_recog_widen_shift_pattern");
2137 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2139 type a_t, b_t, c_t;
2141 S0 a_t = b_t r<< c_t;
2143 Input/Output:
2145 * STMT_VINFO: The stmt from which the pattern search begins,
2146 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2147 with a sequence:
2149 S1 d_t = -c_t;
2150 S2 e_t = d_t & (B - 1);
2151 S3 f_t = b_t << c_t;
2152 S4 g_t = b_t >> e_t;
2153 S0 a_t = f_t | g_t;
2155 where B is element bitsize of type.
2157 Output:
2159 * TYPE_OUT: The type of the output of this pattern.
2161 * Return value: A new stmt that will be used to replace the rotate
2162 S0 stmt. */
2164 static gimple *
2165 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2167 gimple *last_stmt = stmt_vinfo->stmt;
2168 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2169 gimple *pattern_stmt, *def_stmt;
2170 enum tree_code rhs_code;
2171 vec_info *vinfo = stmt_vinfo->vinfo;
2172 enum vect_def_type dt;
2173 optab optab1, optab2;
2174 edge ext_def = NULL;
2175 bool bswap16_p = false;
2177 if (is_gimple_assign (last_stmt))
2179 rhs_code = gimple_assign_rhs_code (last_stmt);
2180 switch (rhs_code)
2182 case LROTATE_EXPR:
2183 case RROTATE_EXPR:
2184 break;
2185 default:
2186 return NULL;
2189 lhs = gimple_assign_lhs (last_stmt);
2190 oprnd0 = gimple_assign_rhs1 (last_stmt);
2191 type = TREE_TYPE (oprnd0);
2192 oprnd1 = gimple_assign_rhs2 (last_stmt);
2194 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2196 /* __builtin_bswap16 (x) is another form of x r>> 8.
2197 The vectorizer has bswap support, but only if the argument isn't
2198 promoted. */
2199 lhs = gimple_call_lhs (last_stmt);
2200 oprnd0 = gimple_call_arg (last_stmt, 0);
2201 type = TREE_TYPE (oprnd0);
2202 if (!lhs
2203 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2204 || TYPE_PRECISION (type) <= 16
2205 || TREE_CODE (oprnd0) != SSA_NAME
2206 || BITS_PER_UNIT != 8
2207 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2208 return NULL;
2210 stmt_vec_info def_stmt_info;
2211 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2212 return NULL;
2214 if (dt != vect_internal_def)
2215 return NULL;
2217 if (gimple_assign_cast_p (def_stmt))
2219 def = gimple_assign_rhs1 (def_stmt);
2220 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2221 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2222 oprnd0 = def;
2225 type = TREE_TYPE (lhs);
2226 vectype = get_vectype_for_scalar_type (vinfo, type);
2227 if (vectype == NULL_TREE)
2228 return NULL;
2230 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2232 /* The encoding uses one stepped pattern for each byte in the
2233 16-bit word. */
2234 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2235 for (unsigned i = 0; i < 3; ++i)
2236 for (unsigned j = 0; j < 2; ++j)
2237 elts.quick_push ((i + 1) * 2 - j - 1);
2239 vec_perm_indices indices (elts, 1,
2240 TYPE_VECTOR_SUBPARTS (char_vectype));
2241 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2243 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2244 undo the argument promotion. */
2245 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2247 def = vect_recog_temp_ssa_var (type, NULL);
2248 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2249 append_pattern_def_seq (stmt_vinfo, def_stmt);
2250 oprnd0 = def;
2253 /* Pattern detected. */
2254 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2256 *type_out = vectype;
2258 /* Pattern supported. Create a stmt to be used to replace the
2259 pattern, with the unpromoted argument. */
2260 var = vect_recog_temp_ssa_var (type, NULL);
2261 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2262 1, oprnd0);
2263 gimple_call_set_lhs (pattern_stmt, var);
2264 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2265 gimple_call_fntype (last_stmt));
2266 return pattern_stmt;
2270 oprnd1 = build_int_cst (integer_type_node, 8);
2271 rhs_code = LROTATE_EXPR;
2272 bswap16_p = true;
2274 else
2275 return NULL;
2277 if (TREE_CODE (oprnd0) != SSA_NAME
2278 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2279 || !INTEGRAL_TYPE_P (type)
2280 || !TYPE_UNSIGNED (type))
2281 return NULL;
2283 stmt_vec_info def_stmt_info;
2284 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2285 return NULL;
2287 if (dt != vect_internal_def
2288 && dt != vect_constant_def
2289 && dt != vect_external_def)
2290 return NULL;
2292 vectype = get_vectype_for_scalar_type (vinfo, type);
2293 if (vectype == NULL_TREE)
2294 return NULL;
2296 /* If vector/vector or vector/scalar rotate is supported by the target,
2297 don't do anything here. */
2298 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2299 if (optab1
2300 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2302 use_rotate:
2303 if (bswap16_p)
2305 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2307 def = vect_recog_temp_ssa_var (type, NULL);
2308 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2309 append_pattern_def_seq (stmt_vinfo, def_stmt);
2310 oprnd0 = def;
2313 /* Pattern detected. */
2314 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2316 *type_out = vectype;
2318 /* Pattern supported. Create a stmt to be used to replace the
2319 pattern. */
2320 var = vect_recog_temp_ssa_var (type, NULL);
2321 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2322 oprnd1);
2323 return pattern_stmt;
2325 return NULL;
2328 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2330 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2331 if (optab2
2332 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2333 goto use_rotate;
2336 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2337 don't do anything here either. */
2338 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2339 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2340 if (!optab1
2341 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2342 || !optab2
2343 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2345 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2346 return NULL;
2347 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2348 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2349 if (!optab1
2350 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2351 || !optab2
2352 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2353 return NULL;
2356 *type_out = vectype;
2358 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2360 def = vect_recog_temp_ssa_var (type, NULL);
2361 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2362 append_pattern_def_seq (stmt_vinfo, def_stmt);
2363 oprnd0 = def;
2366 if (dt == vect_external_def
2367 && TREE_CODE (oprnd1) == SSA_NAME)
2368 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2370 def = NULL_TREE;
2371 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2372 if (TREE_CODE (oprnd1) == INTEGER_CST
2373 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2374 def = oprnd1;
2375 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2377 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2378 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2379 && TYPE_PRECISION (TREE_TYPE (rhs1))
2380 == TYPE_PRECISION (type))
2381 def = rhs1;
2384 if (def == NULL_TREE)
2386 def = vect_recog_temp_ssa_var (type, NULL);
2387 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2388 if (ext_def)
2390 basic_block new_bb
2391 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2392 gcc_assert (!new_bb);
2394 else
2395 append_pattern_def_seq (stmt_vinfo, def_stmt);
2397 stype = TREE_TYPE (def);
2398 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2400 if (TREE_CODE (def) == INTEGER_CST)
2402 if (!tree_fits_uhwi_p (def)
2403 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2404 || integer_zerop (def))
2405 return NULL;
2406 def2 = build_int_cst (stype,
2407 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2409 else
2411 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2413 if (vecstype == NULL_TREE)
2414 return NULL;
2415 def2 = vect_recog_temp_ssa_var (stype, NULL);
2416 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2417 if (ext_def)
2419 basic_block new_bb
2420 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2421 gcc_assert (!new_bb);
2423 else
2424 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2426 def2 = vect_recog_temp_ssa_var (stype, NULL);
2427 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2428 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2429 gimple_assign_lhs (def_stmt), mask);
2430 if (ext_def)
2432 basic_block new_bb
2433 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2434 gcc_assert (!new_bb);
2436 else
2437 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2440 var1 = vect_recog_temp_ssa_var (type, NULL);
2441 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2442 ? LSHIFT_EXPR : RSHIFT_EXPR,
2443 oprnd0, def);
2444 append_pattern_def_seq (stmt_vinfo, def_stmt);
2446 var2 = vect_recog_temp_ssa_var (type, NULL);
2447 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2448 ? RSHIFT_EXPR : LSHIFT_EXPR,
2449 oprnd0, def2);
2450 append_pattern_def_seq (stmt_vinfo, def_stmt);
2452 /* Pattern detected. */
2453 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2455 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2456 var = vect_recog_temp_ssa_var (type, NULL);
2457 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2459 return pattern_stmt;
2462 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2463 vectorized:
2465 type a_t;
2466 TYPE b_T, res_T;
2468 S1 a_t = ;
2469 S2 b_T = ;
2470 S3 res_T = b_T op a_t;
2472 where type 'TYPE' is a type with different size than 'type',
2473 and op is <<, >> or rotate.
2475 Also detect cases:
2477 type a_t;
2478 TYPE b_T, c_T, res_T;
2480 S0 c_T = ;
2481 S1 a_t = (type) c_T;
2482 S2 b_T = ;
2483 S3 res_T = b_T op a_t;
2485 Input/Output:
2487 * STMT_VINFO: The stmt from which the pattern search begins,
2488 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2489 with a shift/rotate which has same type on both operands, in the
2490 second case just b_T op c_T, in the first case with added cast
2491 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2493 Output:
2495 * TYPE_OUT: The type of the output of this pattern.
2497 * Return value: A new stmt that will be used to replace the shift/rotate
2498 S3 stmt. */
2500 static gimple *
2501 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2502 tree *type_out)
2504 gimple *last_stmt = stmt_vinfo->stmt;
2505 tree oprnd0, oprnd1, lhs, var;
2506 gimple *pattern_stmt;
2507 enum tree_code rhs_code;
2508 vec_info *vinfo = stmt_vinfo->vinfo;
2510 if (!is_gimple_assign (last_stmt))
2511 return NULL;
2513 rhs_code = gimple_assign_rhs_code (last_stmt);
2514 switch (rhs_code)
2516 case LSHIFT_EXPR:
2517 case RSHIFT_EXPR:
2518 case LROTATE_EXPR:
2519 case RROTATE_EXPR:
2520 break;
2521 default:
2522 return NULL;
2525 lhs = gimple_assign_lhs (last_stmt);
2526 oprnd0 = gimple_assign_rhs1 (last_stmt);
2527 oprnd1 = gimple_assign_rhs2 (last_stmt);
2528 if (TREE_CODE (oprnd0) != SSA_NAME
2529 || TREE_CODE (oprnd1) != SSA_NAME
2530 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2531 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2532 || TYPE_PRECISION (TREE_TYPE (lhs))
2533 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2534 return NULL;
2536 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2537 if (!def_vinfo)
2538 return NULL;
2540 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2541 if (*type_out == NULL_TREE)
2542 return NULL;
2544 tree def = NULL_TREE;
2545 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2546 if (def_stmt && gimple_assign_cast_p (def_stmt))
2548 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2549 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2550 && TYPE_PRECISION (TREE_TYPE (rhs1))
2551 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2553 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2554 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2555 def = rhs1;
2556 else
2558 tree mask
2559 = build_low_bits_mask (TREE_TYPE (rhs1),
2560 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2561 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2562 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2563 tree vecstype = get_vectype_for_scalar_type (vinfo,
2564 TREE_TYPE (rhs1));
2565 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2570 if (def == NULL_TREE)
2572 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2573 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2574 append_pattern_def_seq (stmt_vinfo, def_stmt);
2577 /* Pattern detected. */
2578 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2580 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2581 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2582 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2584 return pattern_stmt;
2587 /* Return true iff the target has a vector optab implementing the operation
2588 CODE on type VECTYPE. */
2590 static bool
2591 target_has_vecop_for_code (tree_code code, tree vectype)
2593 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2594 return voptab
2595 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2598 /* Verify that the target has optabs of VECTYPE to perform all the steps
2599 needed by the multiplication-by-immediate synthesis algorithm described by
2600 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2601 present. Return true iff the target supports all the steps. */
2603 static bool
2604 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2605 tree vectype, bool synth_shift_p)
2607 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2608 return false;
2610 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2611 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2613 if (var == negate_variant
2614 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2615 return false;
2617 /* If we must synthesize shifts with additions make sure that vector
2618 addition is available. */
2619 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2620 return false;
2622 for (int i = 1; i < alg->ops; i++)
2624 switch (alg->op[i])
2626 case alg_shift:
2627 break;
2628 case alg_add_t_m2:
2629 case alg_add_t2_m:
2630 case alg_add_factor:
2631 if (!supports_vplus)
2632 return false;
2633 break;
2634 case alg_sub_t_m2:
2635 case alg_sub_t2_m:
2636 case alg_sub_factor:
2637 if (!supports_vminus)
2638 return false;
2639 break;
2640 case alg_unknown:
2641 case alg_m:
2642 case alg_zero:
2643 case alg_impossible:
2644 return false;
2645 default:
2646 gcc_unreachable ();
2650 return true;
2653 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2654 putting the final result in DEST. Append all statements but the last into
2655 VINFO. Return the last statement. */
2657 static gimple *
2658 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2659 stmt_vec_info vinfo)
2661 HOST_WIDE_INT i;
2662 tree itype = TREE_TYPE (op);
2663 tree prev_res = op;
2664 gcc_assert (amnt >= 0);
2665 for (i = 0; i < amnt; i++)
2667 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2668 : dest;
2669 gimple *stmt
2670 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2671 prev_res = tmp_var;
2672 if (i < amnt - 1)
2673 append_pattern_def_seq (vinfo, stmt);
2674 else
2675 return stmt;
2677 gcc_unreachable ();
2678 return NULL;
2681 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2682 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2683 the process if necessary. Append the resulting assignment statements
2684 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2685 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2686 left shifts using additions. */
2688 static tree
2689 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2690 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2692 if (integer_zerop (op2)
2693 && (code == LSHIFT_EXPR
2694 || code == PLUS_EXPR))
2696 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2697 return op1;
2700 gimple *stmt;
2701 tree itype = TREE_TYPE (op1);
2702 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2704 if (code == LSHIFT_EXPR
2705 && synth_shift_p)
2707 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2708 stmt_vinfo);
2709 append_pattern_def_seq (stmt_vinfo, stmt);
2710 return tmp_var;
2713 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2714 append_pattern_def_seq (stmt_vinfo, stmt);
2715 return tmp_var;
2718 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2719 and simple arithmetic operations to be vectorized. Record the statements
2720 produced in STMT_VINFO and return the last statement in the sequence or
2721 NULL if it's not possible to synthesize such a multiplication.
2722 This function mirrors the behavior of expand_mult_const in expmed.c but
2723 works on tree-ssa form. */
2725 static gimple *
2726 vect_synth_mult_by_constant (tree op, tree val,
2727 stmt_vec_info stmt_vinfo)
2729 vec_info *vinfo = stmt_vinfo->vinfo;
2730 tree itype = TREE_TYPE (op);
2731 machine_mode mode = TYPE_MODE (itype);
2732 struct algorithm alg;
2733 mult_variant variant;
2734 if (!tree_fits_shwi_p (val))
2735 return NULL;
2737 /* Multiplication synthesis by shifts, adds and subs can introduce
2738 signed overflow where the original operation didn't. Perform the
2739 operations on an unsigned type and cast back to avoid this.
2740 In the future we may want to relax this for synthesis algorithms
2741 that we can prove do not cause unexpected overflow. */
2742 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2744 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2746 /* Targets that don't support vector shifts but support vector additions
2747 can synthesize shifts that way. */
2748 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
2750 HOST_WIDE_INT hwval = tree_to_shwi (val);
2751 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2752 The vectorizer's benefit analysis will decide whether it's beneficial
2753 to do this. */
2754 bool possible = choose_mult_variant (mode, hwval, &alg,
2755 &variant, MAX_COST);
2756 if (!possible)
2757 return NULL;
2759 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
2761 if (!vectype
2762 || !target_supports_mult_synth_alg (&alg, variant,
2763 vectype, synth_shift_p))
2764 return NULL;
2766 tree accumulator;
2768 /* Clear out the sequence of statements so we can populate it below. */
2769 gimple *stmt = NULL;
2771 if (cast_to_unsigned_p)
2773 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2774 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2775 append_pattern_def_seq (stmt_vinfo, stmt);
2776 op = tmp_op;
2779 if (alg.op[0] == alg_zero)
2780 accumulator = build_int_cst (multtype, 0);
2781 else
2782 accumulator = op;
2784 bool needs_fixup = (variant == negate_variant)
2785 || (variant == add_variant);
2787 for (int i = 1; i < alg.ops; i++)
2789 tree shft_log = build_int_cst (multtype, alg.log[i]);
2790 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2791 tree tmp_var = NULL_TREE;
2793 switch (alg.op[i])
2795 case alg_shift:
2796 if (synth_shift_p)
2797 stmt
2798 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2799 stmt_vinfo);
2800 else
2801 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2802 shft_log);
2803 break;
2804 case alg_add_t_m2:
2805 tmp_var
2806 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2807 stmt_vinfo, synth_shift_p);
2808 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2809 tmp_var);
2810 break;
2811 case alg_sub_t_m2:
2812 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2813 shft_log, stmt_vinfo,
2814 synth_shift_p);
2815 /* In some algorithms the first step involves zeroing the
2816 accumulator. If subtracting from such an accumulator
2817 just emit the negation directly. */
2818 if (integer_zerop (accumulator))
2819 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2820 else
2821 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2822 tmp_var);
2823 break;
2824 case alg_add_t2_m:
2825 tmp_var
2826 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2827 stmt_vinfo, synth_shift_p);
2828 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2829 break;
2830 case alg_sub_t2_m:
2831 tmp_var
2832 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2833 stmt_vinfo, synth_shift_p);
2834 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2835 break;
2836 case alg_add_factor:
2837 tmp_var
2838 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2839 stmt_vinfo, synth_shift_p);
2840 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2841 tmp_var);
2842 break;
2843 case alg_sub_factor:
2844 tmp_var
2845 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2846 stmt_vinfo, synth_shift_p);
2847 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2848 accumulator);
2849 break;
2850 default:
2851 gcc_unreachable ();
2853 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2854 but rather return it directly. */
2856 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2857 append_pattern_def_seq (stmt_vinfo, stmt);
2858 accumulator = accum_tmp;
2860 if (variant == negate_variant)
2862 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2863 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2864 accumulator = accum_tmp;
2865 if (cast_to_unsigned_p)
2866 append_pattern_def_seq (stmt_vinfo, stmt);
2868 else if (variant == add_variant)
2870 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2871 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2872 accumulator = accum_tmp;
2873 if (cast_to_unsigned_p)
2874 append_pattern_def_seq (stmt_vinfo, stmt);
2876 /* Move back to a signed if needed. */
2877 if (cast_to_unsigned_p)
2879 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2880 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2883 return stmt;
2886 /* Detect multiplication by constant and convert it into a sequence of
2887 shifts and additions, subtractions, negations. We reuse the
2888 choose_mult_variant algorithms from expmed.c
2890 Input/Output:
2892 STMT_VINFO: The stmt from which the pattern search begins,
2893 i.e. the mult stmt.
2895 Output:
2897 * TYPE_OUT: The type of the output of this pattern.
2899 * Return value: A new stmt that will be used to replace
2900 the multiplication. */
2902 static gimple *
2903 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2905 vec_info *vinfo = stmt_vinfo->vinfo;
2906 gimple *last_stmt = stmt_vinfo->stmt;
2907 tree oprnd0, oprnd1, vectype, itype;
2908 gimple *pattern_stmt;
2910 if (!is_gimple_assign (last_stmt))
2911 return NULL;
2913 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2914 return NULL;
2916 oprnd0 = gimple_assign_rhs1 (last_stmt);
2917 oprnd1 = gimple_assign_rhs2 (last_stmt);
2918 itype = TREE_TYPE (oprnd0);
2920 if (TREE_CODE (oprnd0) != SSA_NAME
2921 || TREE_CODE (oprnd1) != INTEGER_CST
2922 || !INTEGRAL_TYPE_P (itype)
2923 || !type_has_mode_precision_p (itype))
2924 return NULL;
2926 vectype = get_vectype_for_scalar_type (vinfo, itype);
2927 if (vectype == NULL_TREE)
2928 return NULL;
2930 /* If the target can handle vectorized multiplication natively,
2931 don't attempt to optimize this. */
2932 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2933 if (mul_optab != unknown_optab)
2935 machine_mode vec_mode = TYPE_MODE (vectype);
2936 int icode = (int) optab_handler (mul_optab, vec_mode);
2937 if (icode != CODE_FOR_nothing)
2938 return NULL;
2941 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2942 if (!pattern_stmt)
2943 return NULL;
2945 /* Pattern detected. */
2946 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2948 *type_out = vectype;
2950 return pattern_stmt;
2953 /* Detect a signed division by a constant that wouldn't be
2954 otherwise vectorized:
2956 type a_t, b_t;
2958 S1 a_t = b_t / N;
2960 where type 'type' is an integral type and N is a constant.
2962 Similarly handle modulo by a constant:
2964 S4 a_t = b_t % N;
2966 Input/Output:
2968 * STMT_VINFO: The stmt from which the pattern search begins,
2969 i.e. the division stmt. S1 is replaced by if N is a power
2970 of two constant and type is signed:
2971 S3 y_t = b_t < 0 ? N - 1 : 0;
2972 S2 x_t = b_t + y_t;
2973 S1' a_t = x_t >> log2 (N);
2975 S4 is replaced if N is a power of two constant and
2976 type is signed by (where *_T temporaries have unsigned type):
2977 S9 y_T = b_t < 0 ? -1U : 0U;
2978 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2979 S7 z_t = (type) z_T;
2980 S6 w_t = b_t + z_t;
2981 S5 x_t = w_t & (N - 1);
2982 S4' a_t = x_t - z_t;
2984 Output:
2986 * TYPE_OUT: The type of the output of this pattern.
2988 * Return value: A new stmt that will be used to replace the division
2989 S1 or modulo S4 stmt. */
2991 static gimple *
2992 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2994 vec_info *vinfo = stmt_vinfo->vinfo;
2995 gimple *last_stmt = stmt_vinfo->stmt;
2996 tree oprnd0, oprnd1, vectype, itype, cond;
2997 gimple *pattern_stmt, *def_stmt;
2998 enum tree_code rhs_code;
2999 optab optab;
3000 tree q;
3001 int dummy_int, prec;
3003 if (!is_gimple_assign (last_stmt))
3004 return NULL;
3006 rhs_code = gimple_assign_rhs_code (last_stmt);
3007 switch (rhs_code)
3009 case TRUNC_DIV_EXPR:
3010 case EXACT_DIV_EXPR:
3011 case TRUNC_MOD_EXPR:
3012 break;
3013 default:
3014 return NULL;
3017 oprnd0 = gimple_assign_rhs1 (last_stmt);
3018 oprnd1 = gimple_assign_rhs2 (last_stmt);
3019 itype = TREE_TYPE (oprnd0);
3020 if (TREE_CODE (oprnd0) != SSA_NAME
3021 || TREE_CODE (oprnd1) != INTEGER_CST
3022 || TREE_CODE (itype) != INTEGER_TYPE
3023 || !type_has_mode_precision_p (itype))
3024 return NULL;
3026 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3027 vectype = get_vectype_for_scalar_type (vinfo, itype);
3028 if (vectype == NULL_TREE)
3029 return NULL;
3031 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3033 /* If the target can handle vectorized division or modulo natively,
3034 don't attempt to optimize this, since native division is likely
3035 to give smaller code. */
3036 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3037 if (optab != unknown_optab)
3039 machine_mode vec_mode = TYPE_MODE (vectype);
3040 int icode = (int) optab_handler (optab, vec_mode);
3041 if (icode != CODE_FOR_nothing)
3042 return NULL;
3046 prec = TYPE_PRECISION (itype);
3047 if (integer_pow2p (oprnd1))
3049 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3050 return NULL;
3052 /* Pattern detected. */
3053 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3055 *type_out = vectype;
3057 /* Check if the target supports this internal function. */
3058 internal_fn ifn = IFN_DIV_POW2;
3059 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3061 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3063 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3064 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3065 gimple_call_set_lhs (div_stmt, var_div);
3067 if (rhs_code == TRUNC_MOD_EXPR)
3069 append_pattern_def_seq (stmt_vinfo, div_stmt);
3070 def_stmt
3071 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3072 LSHIFT_EXPR, var_div, shift);
3073 append_pattern_def_seq (stmt_vinfo, def_stmt);
3074 pattern_stmt
3075 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3076 MINUS_EXPR, oprnd0,
3077 gimple_assign_lhs (def_stmt));
3079 else
3080 pattern_stmt = div_stmt;
3081 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3083 return pattern_stmt;
3086 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3087 build_int_cst (itype, 0));
3088 if (rhs_code == TRUNC_DIV_EXPR
3089 || rhs_code == EXACT_DIV_EXPR)
3091 tree var = vect_recog_temp_ssa_var (itype, NULL);
3092 tree shift;
3093 def_stmt
3094 = gimple_build_assign (var, COND_EXPR, cond,
3095 fold_build2 (MINUS_EXPR, itype, oprnd1,
3096 build_int_cst (itype, 1)),
3097 build_int_cst (itype, 0));
3098 append_pattern_def_seq (stmt_vinfo, def_stmt);
3099 var = vect_recog_temp_ssa_var (itype, NULL);
3100 def_stmt
3101 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3102 gimple_assign_lhs (def_stmt));
3103 append_pattern_def_seq (stmt_vinfo, def_stmt);
3105 shift = build_int_cst (itype, tree_log2 (oprnd1));
3106 pattern_stmt
3107 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3108 RSHIFT_EXPR, var, shift);
3110 else
3112 tree signmask;
3113 if (compare_tree_int (oprnd1, 2) == 0)
3115 signmask = vect_recog_temp_ssa_var (itype, NULL);
3116 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3117 build_int_cst (itype, 1),
3118 build_int_cst (itype, 0));
3119 append_pattern_def_seq (stmt_vinfo, def_stmt);
3121 else
3123 tree utype
3124 = build_nonstandard_integer_type (prec, 1);
3125 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3126 tree shift
3127 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3128 - tree_log2 (oprnd1));
3129 tree var = vect_recog_temp_ssa_var (utype, NULL);
3131 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3132 build_int_cst (utype, -1),
3133 build_int_cst (utype, 0));
3134 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
3135 var = vect_recog_temp_ssa_var (utype, NULL);
3136 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3137 gimple_assign_lhs (def_stmt),
3138 shift);
3139 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
3140 signmask = vect_recog_temp_ssa_var (itype, NULL);
3141 def_stmt
3142 = gimple_build_assign (signmask, NOP_EXPR, var);
3143 append_pattern_def_seq (stmt_vinfo, def_stmt);
3145 def_stmt
3146 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3147 PLUS_EXPR, oprnd0, signmask);
3148 append_pattern_def_seq (stmt_vinfo, def_stmt);
3149 def_stmt
3150 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3151 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3152 fold_build2 (MINUS_EXPR, itype, oprnd1,
3153 build_int_cst (itype, 1)));
3154 append_pattern_def_seq (stmt_vinfo, def_stmt);
3156 pattern_stmt
3157 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3158 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3159 signmask);
3162 return pattern_stmt;
3165 if (prec > HOST_BITS_PER_WIDE_INT
3166 || integer_zerop (oprnd1))
3167 return NULL;
3169 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3170 return NULL;
3172 if (TYPE_UNSIGNED (itype))
3174 unsigned HOST_WIDE_INT mh, ml;
3175 int pre_shift, post_shift;
3176 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3177 & GET_MODE_MASK (itype_mode));
3178 tree t1, t2, t3, t4;
3180 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3181 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3182 return NULL;
3184 /* Find a suitable multiplier and right shift count
3185 instead of multiplying with D. */
3186 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3188 /* If the suggested multiplier is more than SIZE bits, we can do better
3189 for even divisors, using an initial right shift. */
3190 if (mh != 0 && (d & 1) == 0)
3192 pre_shift = ctz_or_zero (d);
3193 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3194 &ml, &post_shift, &dummy_int);
3195 gcc_assert (!mh);
3197 else
3198 pre_shift = 0;
3200 if (mh != 0)
3202 if (post_shift - 1 >= prec)
3203 return NULL;
3205 /* t1 = oprnd0 h* ml;
3206 t2 = oprnd0 - t1;
3207 t3 = t2 >> 1;
3208 t4 = t1 + t3;
3209 q = t4 >> (post_shift - 1); */
3210 t1 = vect_recog_temp_ssa_var (itype, NULL);
3211 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3212 build_int_cst (itype, ml));
3213 append_pattern_def_seq (stmt_vinfo, def_stmt);
3215 t2 = vect_recog_temp_ssa_var (itype, NULL);
3216 def_stmt
3217 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3218 append_pattern_def_seq (stmt_vinfo, def_stmt);
3220 t3 = vect_recog_temp_ssa_var (itype, NULL);
3221 def_stmt
3222 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3223 append_pattern_def_seq (stmt_vinfo, def_stmt);
3225 t4 = vect_recog_temp_ssa_var (itype, NULL);
3226 def_stmt
3227 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3229 if (post_shift != 1)
3231 append_pattern_def_seq (stmt_vinfo, def_stmt);
3233 q = vect_recog_temp_ssa_var (itype, NULL);
3234 pattern_stmt
3235 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3236 build_int_cst (itype, post_shift - 1));
3238 else
3240 q = t4;
3241 pattern_stmt = def_stmt;
3244 else
3246 if (pre_shift >= prec || post_shift >= prec)
3247 return NULL;
3249 /* t1 = oprnd0 >> pre_shift;
3250 t2 = t1 h* ml;
3251 q = t2 >> post_shift; */
3252 if (pre_shift)
3254 t1 = vect_recog_temp_ssa_var (itype, NULL);
3255 def_stmt
3256 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3257 build_int_cst (NULL, pre_shift));
3258 append_pattern_def_seq (stmt_vinfo, def_stmt);
3260 else
3261 t1 = oprnd0;
3263 t2 = vect_recog_temp_ssa_var (itype, NULL);
3264 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3265 build_int_cst (itype, ml));
3267 if (post_shift)
3269 append_pattern_def_seq (stmt_vinfo, def_stmt);
3271 q = vect_recog_temp_ssa_var (itype, NULL);
3272 def_stmt
3273 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3274 build_int_cst (itype, post_shift));
3276 else
3277 q = t2;
3279 pattern_stmt = def_stmt;
3282 else
3284 unsigned HOST_WIDE_INT ml;
3285 int post_shift;
3286 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3287 unsigned HOST_WIDE_INT abs_d;
3288 bool add = false;
3289 tree t1, t2, t3, t4;
3291 /* Give up for -1. */
3292 if (d == -1)
3293 return NULL;
3295 /* Since d might be INT_MIN, we have to cast to
3296 unsigned HOST_WIDE_INT before negating to avoid
3297 undefined signed overflow. */
3298 abs_d = (d >= 0
3299 ? (unsigned HOST_WIDE_INT) d
3300 : - (unsigned HOST_WIDE_INT) d);
3302 /* n rem d = n rem -d */
3303 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3305 d = abs_d;
3306 oprnd1 = build_int_cst (itype, abs_d);
3308 else if (HOST_BITS_PER_WIDE_INT >= prec
3309 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3310 /* This case is not handled correctly below. */
3311 return NULL;
3313 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3314 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3316 add = true;
3317 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3319 if (post_shift >= prec)
3320 return NULL;
3322 /* t1 = oprnd0 h* ml; */
3323 t1 = vect_recog_temp_ssa_var (itype, NULL);
3324 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3325 build_int_cst (itype, ml));
3327 if (add)
3329 /* t2 = t1 + oprnd0; */
3330 append_pattern_def_seq (stmt_vinfo, def_stmt);
3331 t2 = vect_recog_temp_ssa_var (itype, NULL);
3332 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3334 else
3335 t2 = t1;
3337 if (post_shift)
3339 /* t3 = t2 >> post_shift; */
3340 append_pattern_def_seq (stmt_vinfo, def_stmt);
3341 t3 = vect_recog_temp_ssa_var (itype, NULL);
3342 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3343 build_int_cst (itype, post_shift));
3345 else
3346 t3 = t2;
3348 wide_int oprnd0_min, oprnd0_max;
3349 int msb = 1;
3350 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3352 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3353 msb = 0;
3354 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3355 msb = -1;
3358 if (msb == 0 && d >= 0)
3360 /* q = t3; */
3361 q = t3;
3362 pattern_stmt = def_stmt;
3364 else
3366 /* t4 = oprnd0 >> (prec - 1);
3367 or if we know from VRP that oprnd0 >= 0
3368 t4 = 0;
3369 or if we know from VRP that oprnd0 < 0
3370 t4 = -1; */
3371 append_pattern_def_seq (stmt_vinfo, def_stmt);
3372 t4 = vect_recog_temp_ssa_var (itype, NULL);
3373 if (msb != 1)
3374 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3375 build_int_cst (itype, msb));
3376 else
3377 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3378 build_int_cst (itype, prec - 1));
3379 append_pattern_def_seq (stmt_vinfo, def_stmt);
3381 /* q = t3 - t4; or q = t4 - t3; */
3382 q = vect_recog_temp_ssa_var (itype, NULL);
3383 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3384 d < 0 ? t3 : t4);
3388 if (rhs_code == TRUNC_MOD_EXPR)
3390 tree r, t1;
3392 /* We divided. Now finish by:
3393 t1 = q * oprnd1;
3394 r = oprnd0 - t1; */
3395 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3397 t1 = vect_recog_temp_ssa_var (itype, NULL);
3398 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3399 append_pattern_def_seq (stmt_vinfo, def_stmt);
3401 r = vect_recog_temp_ssa_var (itype, NULL);
3402 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3405 /* Pattern detected. */
3406 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3408 *type_out = vectype;
3409 return pattern_stmt;
3412 /* Function vect_recog_mixed_size_cond_pattern
3414 Try to find the following pattern:
3416 type x_t, y_t;
3417 TYPE a_T, b_T, c_T;
3418 loop:
3419 S1 a_T = x_t CMP y_t ? b_T : c_T;
3421 where type 'TYPE' is an integral type which has different size
3422 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3423 than 'type', the constants need to fit into an integer type
3424 with the same width as 'type') or results of conversion from 'type'.
3426 Input:
3428 * STMT_VINFO: The stmt from which the pattern search begins.
3430 Output:
3432 * TYPE_OUT: The type of the output of this pattern.
3434 * Return value: A new stmt that will be used to replace the pattern.
3435 Additionally a def_stmt is added.
3437 a_it = x_t CMP y_t ? b_it : c_it;
3438 a_T = (TYPE) a_it; */
3440 static gimple *
3441 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3443 vec_info *vinfo = stmt_vinfo->vinfo;
3444 gimple *last_stmt = stmt_vinfo->stmt;
3445 tree cond_expr, then_clause, else_clause;
3446 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3447 gimple *pattern_stmt, *def_stmt;
3448 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3449 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3450 bool promotion;
3451 tree comp_scalar_type;
3453 if (!is_gimple_assign (last_stmt)
3454 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3455 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3456 return NULL;
3458 cond_expr = gimple_assign_rhs1 (last_stmt);
3459 then_clause = gimple_assign_rhs2 (last_stmt);
3460 else_clause = gimple_assign_rhs3 (last_stmt);
3462 if (!COMPARISON_CLASS_P (cond_expr))
3463 return NULL;
3465 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3466 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3467 if (comp_vectype == NULL_TREE)
3468 return NULL;
3470 type = gimple_expr_type (last_stmt);
3471 if (types_compatible_p (type, comp_scalar_type)
3472 || ((TREE_CODE (then_clause) != INTEGER_CST
3473 || TREE_CODE (else_clause) != INTEGER_CST)
3474 && !INTEGRAL_TYPE_P (comp_scalar_type))
3475 || !INTEGRAL_TYPE_P (type))
3476 return NULL;
3478 if ((TREE_CODE (then_clause) != INTEGER_CST
3479 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3480 &def_stmt0, &promotion))
3481 || (TREE_CODE (else_clause) != INTEGER_CST
3482 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3483 &def_stmt1, &promotion)))
3484 return NULL;
3486 if (orig_type0 && orig_type1
3487 && !types_compatible_p (orig_type0, orig_type1))
3488 return NULL;
3490 if (orig_type0)
3492 if (!types_compatible_p (orig_type0, comp_scalar_type))
3493 return NULL;
3494 then_clause = gimple_assign_rhs1 (def_stmt0);
3495 itype = orig_type0;
3498 if (orig_type1)
3500 if (!types_compatible_p (orig_type1, comp_scalar_type))
3501 return NULL;
3502 else_clause = gimple_assign_rhs1 (def_stmt1);
3503 itype = orig_type1;
3507 HOST_WIDE_INT cmp_mode_size
3508 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3510 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3511 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3512 return NULL;
3514 vectype = get_vectype_for_scalar_type (vinfo, type);
3515 if (vectype == NULL_TREE)
3516 return NULL;
3518 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3519 return NULL;
3521 if (itype == NULL_TREE)
3522 itype = build_nonstandard_integer_type (cmp_mode_size,
3523 TYPE_UNSIGNED (type));
3525 if (itype == NULL_TREE
3526 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3527 return NULL;
3529 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3530 if (vecitype == NULL_TREE)
3531 return NULL;
3533 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3534 return NULL;
3536 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3538 if ((TREE_CODE (then_clause) == INTEGER_CST
3539 && !int_fits_type_p (then_clause, itype))
3540 || (TREE_CODE (else_clause) == INTEGER_CST
3541 && !int_fits_type_p (else_clause, itype)))
3542 return NULL;
3545 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3546 COND_EXPR, unshare_expr (cond_expr),
3547 fold_convert (itype, then_clause),
3548 fold_convert (itype, else_clause));
3549 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3550 NOP_EXPR, gimple_assign_lhs (def_stmt));
3552 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3553 *type_out = vectype;
3555 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3557 return pattern_stmt;
3561 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3562 true if bool VAR can and should be optimized that way. Assume it shouldn't
3563 in case it's a result of a comparison which can be directly vectorized into
3564 a vector comparison. Fills in STMTS with all stmts visited during the
3565 walk. */
3567 static bool
3568 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3570 tree rhs1;
3571 enum tree_code rhs_code;
3573 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3574 if (!def_stmt_info)
3575 return false;
3577 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3578 if (!def_stmt)
3579 return false;
3581 if (stmts.contains (def_stmt))
3582 return true;
3584 rhs1 = gimple_assign_rhs1 (def_stmt);
3585 rhs_code = gimple_assign_rhs_code (def_stmt);
3586 switch (rhs_code)
3588 case SSA_NAME:
3589 if (! check_bool_pattern (rhs1, vinfo, stmts))
3590 return false;
3591 break;
3593 CASE_CONVERT:
3594 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3595 return false;
3596 if (! check_bool_pattern (rhs1, vinfo, stmts))
3597 return false;
3598 break;
3600 case BIT_NOT_EXPR:
3601 if (! check_bool_pattern (rhs1, vinfo, stmts))
3602 return false;
3603 break;
3605 case BIT_AND_EXPR:
3606 case BIT_IOR_EXPR:
3607 case BIT_XOR_EXPR:
3608 if (! check_bool_pattern (rhs1, vinfo, stmts)
3609 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3610 return false;
3611 break;
3613 default:
3614 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3616 tree vecitype, comp_vectype;
3618 /* If the comparison can throw, then is_gimple_condexpr will be
3619 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3620 if (stmt_could_throw_p (cfun, def_stmt))
3621 return false;
3623 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3624 if (comp_vectype == NULL_TREE)
3625 return false;
3627 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3628 TREE_TYPE (rhs1));
3629 if (mask_type
3630 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3631 return false;
3633 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3635 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3636 tree itype
3637 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3638 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3639 if (vecitype == NULL_TREE)
3640 return false;
3642 else
3643 vecitype = comp_vectype;
3644 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3645 return false;
3647 else
3648 return false;
3649 break;
3652 bool res = stmts.add (def_stmt);
3653 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3654 gcc_assert (!res);
3656 return true;
3660 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3661 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3662 pattern sequence. */
3664 static tree
3665 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3667 vec_info *vinfo = stmt_info->vinfo;
3668 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3669 NOP_EXPR, var);
3670 append_pattern_def_seq (stmt_info, cast_stmt,
3671 get_vectype_for_scalar_type (vinfo, type));
3672 return gimple_assign_lhs (cast_stmt);
3675 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3676 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3677 type, OUT_TYPE is the desired final integer type of the whole pattern.
3678 STMT_INFO is the info of the pattern root and is where pattern stmts should
3679 be associated with. DEFS is a map of pattern defs. */
3681 static void
3682 adjust_bool_pattern (tree var, tree out_type,
3683 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3685 vec_info *vinfo = stmt_info->vinfo;
3686 gimple *stmt = SSA_NAME_DEF_STMT (var);
3687 enum tree_code rhs_code, def_rhs_code;
3688 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3689 location_t loc;
3690 gimple *pattern_stmt, *def_stmt;
3691 tree trueval = NULL_TREE;
3693 rhs1 = gimple_assign_rhs1 (stmt);
3694 rhs2 = gimple_assign_rhs2 (stmt);
3695 rhs_code = gimple_assign_rhs_code (stmt);
3696 loc = gimple_location (stmt);
3697 switch (rhs_code)
3699 case SSA_NAME:
3700 CASE_CONVERT:
3701 irhs1 = *defs.get (rhs1);
3702 itype = TREE_TYPE (irhs1);
3703 pattern_stmt
3704 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3705 SSA_NAME, irhs1);
3706 break;
3708 case BIT_NOT_EXPR:
3709 irhs1 = *defs.get (rhs1);
3710 itype = TREE_TYPE (irhs1);
3711 pattern_stmt
3712 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3713 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3714 break;
3716 case BIT_AND_EXPR:
3717 /* Try to optimize x = y & (a < b ? 1 : 0); into
3718 x = (a < b ? y : 0);
3720 E.g. for:
3721 bool a_b, b_b, c_b;
3722 TYPE d_T;
3724 S1 a_b = x1 CMP1 y1;
3725 S2 b_b = x2 CMP2 y2;
3726 S3 c_b = a_b & b_b;
3727 S4 d_T = (TYPE) c_b;
3729 we would normally emit:
3731 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3732 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3733 S3' c_T = a_T & b_T;
3734 S4' d_T = c_T;
3736 but we can save one stmt by using the
3737 result of one of the COND_EXPRs in the other COND_EXPR and leave
3738 BIT_AND_EXPR stmt out:
3740 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3741 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3742 S4' f_T = c_T;
3744 At least when VEC_COND_EXPR is implemented using masks
3745 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3746 computes the comparison masks and ands it, in one case with
3747 all ones vector, in the other case with a vector register.
3748 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3749 often more expensive. */
3750 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3751 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3752 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3754 irhs1 = *defs.get (rhs1);
3755 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3756 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3757 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3759 rhs_code = def_rhs_code;
3760 rhs1 = def_rhs1;
3761 rhs2 = gimple_assign_rhs2 (def_stmt);
3762 trueval = irhs1;
3763 goto do_compare;
3765 else
3766 irhs2 = *defs.get (rhs2);
3767 goto and_ior_xor;
3769 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3770 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3771 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3773 irhs2 = *defs.get (rhs2);
3774 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3775 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3776 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3778 rhs_code = def_rhs_code;
3779 rhs1 = def_rhs1;
3780 rhs2 = gimple_assign_rhs2 (def_stmt);
3781 trueval = irhs2;
3782 goto do_compare;
3784 else
3785 irhs1 = *defs.get (rhs1);
3786 goto and_ior_xor;
3788 /* FALLTHRU */
3789 case BIT_IOR_EXPR:
3790 case BIT_XOR_EXPR:
3791 irhs1 = *defs.get (rhs1);
3792 irhs2 = *defs.get (rhs2);
3793 and_ior_xor:
3794 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3795 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3797 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3798 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3799 int out_prec = TYPE_PRECISION (out_type);
3800 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3801 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3802 stmt_info);
3803 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3804 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3805 stmt_info);
3806 else
3808 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3809 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3812 itype = TREE_TYPE (irhs1);
3813 pattern_stmt
3814 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3815 rhs_code, irhs1, irhs2);
3816 break;
3818 default:
3819 do_compare:
3820 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3821 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3822 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3823 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3824 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3826 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3827 itype
3828 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3830 else
3831 itype = TREE_TYPE (rhs1);
3832 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3833 if (trueval == NULL_TREE)
3834 trueval = build_int_cst (itype, 1);
3835 else
3836 gcc_checking_assert (useless_type_conversion_p (itype,
3837 TREE_TYPE (trueval)));
3838 pattern_stmt
3839 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3840 COND_EXPR, cond_expr, trueval,
3841 build_int_cst (itype, 0));
3842 break;
3845 gimple_set_location (pattern_stmt, loc);
3846 append_pattern_def_seq (stmt_info, pattern_stmt,
3847 get_vectype_for_scalar_type (vinfo, itype));
3848 defs.put (var, gimple_assign_lhs (pattern_stmt));
3851 /* Comparison function to qsort a vector of gimple stmts after UID. */
3853 static int
3854 sort_after_uid (const void *p1, const void *p2)
3856 const gimple *stmt1 = *(const gimple * const *)p1;
3857 const gimple *stmt2 = *(const gimple * const *)p2;
3858 return gimple_uid (stmt1) - gimple_uid (stmt2);
3861 /* Create pattern stmts for all stmts participating in the bool pattern
3862 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3863 OUT_TYPE. Return the def of the pattern root. */
3865 static tree
3866 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3867 tree out_type, stmt_vec_info stmt_info)
3869 /* Gather original stmts in the bool pattern in their order of appearance
3870 in the IL. */
3871 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3872 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3873 i != bool_stmt_set.end (); ++i)
3874 bool_stmts.quick_push (*i);
3875 bool_stmts.qsort (sort_after_uid);
3877 /* Now process them in that order, producing pattern stmts. */
3878 hash_map <tree, tree> defs;
3879 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3880 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3881 out_type, stmt_info, defs);
3883 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3884 gimple *pattern_stmt
3885 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3886 return gimple_assign_lhs (pattern_stmt);
3889 /* Helper for search_type_for_mask. */
3891 static tree
3892 search_type_for_mask_1 (tree var, vec_info *vinfo,
3893 hash_map<gimple *, tree> &cache)
3895 tree rhs1;
3896 enum tree_code rhs_code;
3897 tree res = NULL_TREE, res2;
3899 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3900 return NULL_TREE;
3902 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3903 if (!def_stmt_info)
3904 return NULL_TREE;
3906 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3907 if (!def_stmt)
3908 return NULL_TREE;
3910 tree *c = cache.get (def_stmt);
3911 if (c)
3912 return *c;
3914 rhs_code = gimple_assign_rhs_code (def_stmt);
3915 rhs1 = gimple_assign_rhs1 (def_stmt);
3917 switch (rhs_code)
3919 case SSA_NAME:
3920 case BIT_NOT_EXPR:
3921 CASE_CONVERT:
3922 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3923 break;
3925 case BIT_AND_EXPR:
3926 case BIT_IOR_EXPR:
3927 case BIT_XOR_EXPR:
3928 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3929 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3930 cache);
3931 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3932 res = res2;
3933 break;
3935 default:
3936 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3938 tree comp_vectype, mask_type;
3940 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3942 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3943 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3944 vinfo, cache);
3945 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3946 res = res2;
3947 break;
3950 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3951 if (comp_vectype == NULL_TREE)
3953 res = NULL_TREE;
3954 break;
3957 mask_type = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3958 if (!mask_type
3959 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3961 res = NULL_TREE;
3962 break;
3965 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3966 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3968 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3969 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3971 else
3972 res = TREE_TYPE (rhs1);
3976 cache.put (def_stmt, res);
3977 return res;
3980 /* Return the proper type for converting bool VAR into
3981 an integer value or NULL_TREE if no such type exists.
3982 The type is chosen so that converted value has the
3983 same number of elements as VAR's vector type. */
3985 static tree
3986 search_type_for_mask (tree var, vec_info *vinfo)
3988 hash_map<gimple *, tree> cache;
3989 return search_type_for_mask_1 (var, vinfo, cache);
3992 /* Function vect_recog_bool_pattern
3994 Try to find pattern like following:
3996 bool a_b, b_b, c_b, d_b, e_b;
3997 TYPE f_T;
3998 loop:
3999 S1 a_b = x1 CMP1 y1;
4000 S2 b_b = x2 CMP2 y2;
4001 S3 c_b = a_b & b_b;
4002 S4 d_b = x3 CMP3 y3;
4003 S5 e_b = c_b | d_b;
4004 S6 f_T = (TYPE) e_b;
4006 where type 'TYPE' is an integral type. Or a similar pattern
4007 ending in
4009 S6 f_Y = e_b ? r_Y : s_Y;
4011 as results from if-conversion of a complex condition.
4013 Input:
4015 * STMT_VINFO: The stmt at the end from which the pattern
4016 search begins, i.e. cast of a bool to
4017 an integer type.
4019 Output:
4021 * TYPE_OUT: The type of the output of this pattern.
4023 * Return value: A new stmt that will be used to replace the pattern.
4025 Assuming size of TYPE is the same as size of all comparisons
4026 (otherwise some casts would be added where needed), the above
4027 sequence we create related pattern stmts:
4028 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4029 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4030 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4031 S5' e_T = c_T | d_T;
4032 S6' f_T = e_T;
4034 Instead of the above S3' we could emit:
4035 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4036 S3' c_T = a_T | b_T;
4037 but the above is more efficient. */
4039 static gimple *
4040 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
4042 gimple *last_stmt = stmt_vinfo->stmt;
4043 enum tree_code rhs_code;
4044 tree var, lhs, rhs, vectype;
4045 vec_info *vinfo = stmt_vinfo->vinfo;
4046 gimple *pattern_stmt;
4048 if (!is_gimple_assign (last_stmt))
4049 return NULL;
4051 var = gimple_assign_rhs1 (last_stmt);
4052 lhs = gimple_assign_lhs (last_stmt);
4054 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4055 return NULL;
4057 hash_set<gimple *> bool_stmts;
4059 rhs_code = gimple_assign_rhs_code (last_stmt);
4060 if (CONVERT_EXPR_CODE_P (rhs_code))
4062 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4063 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
4064 return NULL;
4065 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4066 if (vectype == NULL_TREE)
4067 return NULL;
4069 if (check_bool_pattern (var, vinfo, bool_stmts))
4071 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
4072 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4073 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4074 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4075 else
4076 pattern_stmt
4077 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4079 else
4081 tree type = search_type_for_mask (var, vinfo);
4082 tree cst0, cst1, tmp;
4084 if (!type)
4085 return NULL;
4087 /* We may directly use cond with narrowed type to avoid
4088 multiple cond exprs with following result packing and
4089 perform single cond with packed mask instead. In case
4090 of widening we better make cond first and then extract
4091 results. */
4092 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4093 type = TREE_TYPE (lhs);
4095 cst0 = build_int_cst (type, 0);
4096 cst1 = build_int_cst (type, 1);
4097 tmp = vect_recog_temp_ssa_var (type, NULL);
4098 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4100 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4102 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4103 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
4105 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4106 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4110 *type_out = vectype;
4111 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4113 return pattern_stmt;
4115 else if (rhs_code == COND_EXPR
4116 && TREE_CODE (var) == SSA_NAME)
4118 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4119 if (vectype == NULL_TREE)
4120 return NULL;
4122 /* Build a scalar type for the boolean result that when
4123 vectorized matches the vector type of the result in
4124 size and number of elements. */
4125 unsigned prec
4126 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4127 TYPE_VECTOR_SUBPARTS (vectype));
4129 tree type
4130 = build_nonstandard_integer_type (prec,
4131 TYPE_UNSIGNED (TREE_TYPE (var)));
4132 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4133 return NULL;
4135 if (!check_bool_pattern (var, vinfo, bool_stmts))
4136 return NULL;
4138 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
4140 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4141 pattern_stmt
4142 = gimple_build_assign (lhs, COND_EXPR,
4143 build2 (NE_EXPR, boolean_type_node,
4144 rhs, build_int_cst (type, 0)),
4145 gimple_assign_rhs2 (last_stmt),
4146 gimple_assign_rhs3 (last_stmt));
4147 *type_out = vectype;
4148 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4150 return pattern_stmt;
4152 else if (rhs_code == SSA_NAME
4153 && STMT_VINFO_DATA_REF (stmt_vinfo))
4155 stmt_vec_info pattern_stmt_info;
4156 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
4157 gcc_assert (vectype != NULL_TREE);
4158 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
4159 return NULL;
4161 if (check_bool_pattern (var, vinfo, bool_stmts))
4162 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
4163 else
4165 tree type = search_type_for_mask (var, vinfo);
4166 tree cst0, cst1, new_vectype;
4168 if (!type)
4169 return NULL;
4171 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4172 type = TREE_TYPE (vectype);
4174 cst0 = build_int_cst (type, 0);
4175 cst1 = build_int_cst (type, 1);
4176 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4178 rhs = vect_recog_temp_ssa_var (type, NULL);
4179 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4180 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
4183 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4184 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4186 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4187 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4188 append_pattern_def_seq (stmt_vinfo, cast_stmt);
4189 rhs = rhs2;
4191 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4192 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4193 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4194 *type_out = vectype;
4195 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4197 return pattern_stmt;
4199 else
4200 return NULL;
4204 /* A helper for vect_recog_mask_conversion_pattern. Build
4205 conversion of MASK to a type suitable for masking VECTYPE.
4206 Built statement gets required vectype and is appended to
4207 a pattern sequence of STMT_VINFO.
4209 Return converted mask. */
4211 static tree
4212 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4214 gimple *stmt;
4215 tree masktype, tmp;
4217 masktype = truth_type_for (vectype);
4218 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4219 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4220 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
4222 return tmp;
4226 /* Function vect_recog_mask_conversion_pattern
4228 Try to find statements which require boolean type
4229 converison. Additional conversion statements are
4230 added to handle such cases. For example:
4232 bool m_1, m_2, m_3;
4233 int i_4, i_5;
4234 double d_6, d_7;
4235 char c_1, c_2, c_3;
4237 S1 m_1 = i_4 > i_5;
4238 S2 m_2 = d_6 < d_7;
4239 S3 m_3 = m_1 & m_2;
4240 S4 c_1 = m_3 ? c_2 : c_3;
4242 Will be transformed into:
4244 S1 m_1 = i_4 > i_5;
4245 S2 m_2 = d_6 < d_7;
4246 S3'' m_2' = (_Bool[bitsize=32])m_2
4247 S3' m_3' = m_1 & m_2';
4248 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4249 S4' c_1' = m_3'' ? c_2 : c_3; */
4251 static gimple *
4252 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
4254 gimple *last_stmt = stmt_vinfo->stmt;
4255 enum tree_code rhs_code;
4256 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4257 tree vectype1, vectype2;
4258 stmt_vec_info pattern_stmt_info;
4259 vec_info *vinfo = stmt_vinfo->vinfo;
4261 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4262 if (is_gimple_call (last_stmt)
4263 && gimple_call_internal_p (last_stmt))
4265 gcall *pattern_stmt;
4267 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4268 int mask_argno = internal_fn_mask_index (ifn);
4269 if (mask_argno < 0)
4270 return NULL;
4272 bool store_p = internal_store_fn_p (ifn);
4273 if (store_p)
4275 int rhs_index = internal_fn_stored_value_index (ifn);
4276 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4277 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4279 else
4281 lhs = gimple_call_lhs (last_stmt);
4282 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4285 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4286 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
4287 if (!mask_arg_type)
4288 return NULL;
4289 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4291 if (!vectype1 || !vectype2
4292 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4293 TYPE_VECTOR_SUBPARTS (vectype2)))
4294 return NULL;
4296 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
4298 auto_vec<tree, 8> args;
4299 unsigned int nargs = gimple_call_num_args (last_stmt);
4300 args.safe_grow (nargs);
4301 for (unsigned int i = 0; i < nargs; ++i)
4302 args[i] = ((int) i == mask_argno
4303 ? tmp
4304 : gimple_call_arg (last_stmt, i));
4305 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4307 if (!store_p)
4309 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4310 gimple_call_set_lhs (pattern_stmt, lhs);
4312 gimple_call_set_nothrow (pattern_stmt, true);
4314 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4315 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4316 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4318 *type_out = vectype1;
4319 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4321 return pattern_stmt;
4324 if (!is_gimple_assign (last_stmt))
4325 return NULL;
4327 gimple *pattern_stmt;
4328 lhs = gimple_assign_lhs (last_stmt);
4329 rhs1 = gimple_assign_rhs1 (last_stmt);
4330 rhs_code = gimple_assign_rhs_code (last_stmt);
4332 /* Check for cond expression requiring mask conversion. */
4333 if (rhs_code == COND_EXPR)
4335 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4337 if (TREE_CODE (rhs1) == SSA_NAME)
4339 rhs1_type = search_type_for_mask (rhs1, vinfo);
4340 if (!rhs1_type)
4341 return NULL;
4343 else if (COMPARISON_CLASS_P (rhs1))
4345 /* Check whether we're comparing scalar booleans and (if so)
4346 whether a better mask type exists than the mask associated
4347 with boolean-sized elements. This avoids unnecessary packs
4348 and unpacks if the booleans are set from comparisons of
4349 wider types. E.g. in:
4351 int x1, x2, x3, x4, y1, y1;
4353 bool b1 = (x1 == x2);
4354 bool b2 = (x3 == x4);
4355 ... = b1 == b2 ? y1 : y2;
4357 it is better for b1 and b2 to use the mask type associated
4358 with int elements rather bool (byte) elements. */
4359 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4360 if (!rhs1_type)
4361 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4363 else
4364 return NULL;
4366 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4368 if (!vectype1 || !vectype2)
4369 return NULL;
4371 /* Continue if a conversion is needed. Also continue if we have
4372 a comparison whose vector type would normally be different from
4373 VECTYPE2 when considered in isolation. In that case we'll
4374 replace the comparison with an SSA name (so that we can record
4375 its vector type) and behave as though the comparison was an SSA
4376 name from the outset. */
4377 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4378 TYPE_VECTOR_SUBPARTS (vectype2))
4379 && (TREE_CODE (rhs1) == SSA_NAME
4380 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4381 return NULL;
4383 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4384 in place, we can handle it in vectorizable_condition. This avoids
4385 unnecessary promotion stmts and increased vectorization factor. */
4386 if (COMPARISON_CLASS_P (rhs1)
4387 && INTEGRAL_TYPE_P (rhs1_type)
4388 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4389 TYPE_VECTOR_SUBPARTS (vectype2)))
4391 enum vect_def_type dt;
4392 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4393 && dt == vect_external_def
4394 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4395 && (dt == vect_external_def
4396 || dt == vect_constant_def))
4398 tree wide_scalar_type = build_nonstandard_integer_type
4399 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4400 TYPE_UNSIGNED (rhs1_type));
4401 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4402 wide_scalar_type);
4403 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4404 return NULL;
4408 /* If rhs1 is a comparison we need to move it into a
4409 separate statement. */
4410 if (TREE_CODE (rhs1) != SSA_NAME)
4412 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4413 pattern_stmt = gimple_build_assign (tmp, rhs1);
4414 rhs1 = tmp;
4415 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
4418 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4419 TYPE_VECTOR_SUBPARTS (vectype2)))
4420 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4421 else
4422 tmp = rhs1;
4424 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4425 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4426 gimple_assign_rhs2 (last_stmt),
4427 gimple_assign_rhs3 (last_stmt));
4429 *type_out = vectype1;
4430 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4432 return pattern_stmt;
4435 /* Now check for binary boolean operations requiring conversion for
4436 one of operands. */
4437 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4438 return NULL;
4440 if (rhs_code != BIT_IOR_EXPR
4441 && rhs_code != BIT_XOR_EXPR
4442 && rhs_code != BIT_AND_EXPR
4443 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4444 return NULL;
4446 rhs2 = gimple_assign_rhs2 (last_stmt);
4448 rhs1_type = search_type_for_mask (rhs1, vinfo);
4449 rhs2_type = search_type_for_mask (rhs2, vinfo);
4451 if (!rhs1_type || !rhs2_type
4452 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4453 return NULL;
4455 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4457 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4458 if (!vectype1)
4459 return NULL;
4460 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4462 else
4464 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4465 if (!vectype1)
4466 return NULL;
4467 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4470 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4471 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4473 *type_out = vectype1;
4474 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4476 return pattern_stmt;
4479 /* STMT_INFO is a load or store. If the load or store is conditional, return
4480 the boolean condition under which it occurs, otherwise return null. */
4482 static tree
4483 vect_get_load_store_mask (stmt_vec_info stmt_info)
4485 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4487 gcc_assert (gimple_assign_single_p (def_assign));
4488 return NULL_TREE;
4491 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4493 internal_fn ifn = gimple_call_internal_fn (def_call);
4494 int mask_index = internal_fn_mask_index (ifn);
4495 return gimple_call_arg (def_call, mask_index);
4498 gcc_unreachable ();
4501 /* Return MASK if MASK is suitable for masking an operation on vectors
4502 of type VECTYPE, otherwise convert it into such a form and return
4503 the result. Associate any conversion statements with STMT_INFO's
4504 pattern. */
4506 static tree
4507 vect_convert_mask_for_vectype (tree mask, tree vectype,
4508 stmt_vec_info stmt_info, vec_info *vinfo)
4510 tree mask_type = search_type_for_mask (mask, vinfo);
4511 if (mask_type)
4513 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4514 if (mask_vectype
4515 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4516 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4517 mask = build_mask_conversion (mask, vectype, stmt_info);
4519 return mask;
4522 /* Return the equivalent of:
4524 fold_convert (TYPE, VALUE)
4526 with the expectation that the operation will be vectorized.
4527 If new statements are needed, add them as pattern statements
4528 to STMT_INFO. */
4530 static tree
4531 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4533 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4534 return value;
4536 vec_info *vinfo = stmt_info->vinfo;
4537 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4538 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4539 append_pattern_def_seq (stmt_info, conversion,
4540 get_vectype_for_scalar_type (vinfo, type));
4541 return new_value;
4544 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4545 internal function. Return the final statement on success and set
4546 *TYPE_OUT to the vector type being loaded or stored.
4548 This function only handles gathers and scatters that were recognized
4549 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4551 static gimple *
4552 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4554 /* Currently we only support this for loop vectorization. */
4555 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4556 if (!loop_vinfo)
4557 return NULL;
4559 /* Make sure that we're looking at a gather load or scatter store. */
4560 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4561 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4562 return NULL;
4564 /* Get the boolean that controls whether the load or store happens.
4565 This is null if the operation is unconditional. */
4566 tree mask = vect_get_load_store_mask (stmt_info);
4568 /* Make sure that the target supports an appropriate internal
4569 function for the gather/scatter operation. */
4570 gather_scatter_info gs_info;
4571 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4572 || gs_info.decl)
4573 return NULL;
4575 /* Convert the mask to the right form. */
4576 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4577 gs_info.element_type);
4578 if (mask)
4579 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4580 loop_vinfo);
4582 /* Get the invariant base and non-invariant offset, converting the
4583 latter to the same width as the vector elements. */
4584 tree base = gs_info.base;
4585 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4586 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4587 stmt_info);
4589 /* Build the new pattern statement. */
4590 tree scale = size_int (gs_info.scale);
4591 gcall *pattern_stmt;
4592 if (DR_IS_READ (dr))
4594 tree zero = build_zero_cst (gs_info.element_type);
4595 if (mask != NULL)
4596 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4597 offset, scale, zero, mask);
4598 else
4599 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4600 offset, scale, zero);
4601 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4602 gimple_call_set_lhs (pattern_stmt, load_lhs);
4604 else
4606 tree rhs = vect_get_store_rhs (stmt_info);
4607 if (mask != NULL)
4608 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4609 base, offset, scale, rhs,
4610 mask);
4611 else
4612 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4613 base, offset, scale, rhs);
4615 gimple_call_set_nothrow (pattern_stmt, true);
4617 /* Copy across relevant vectorization info and associate DR with the
4618 new pattern statement instead of the original statement. */
4619 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4620 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4622 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4623 *type_out = vectype;
4624 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4626 return pattern_stmt;
4629 /* Return true if TYPE is a non-boolean integer type. These are the types
4630 that we want to consider for narrowing. */
4632 static bool
4633 vect_narrowable_type_p (tree type)
4635 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4638 /* Return true if the operation given by CODE can be truncated to N bits
4639 when only N bits of the output are needed. This is only true if bit N+1
4640 of the inputs has no effect on the low N bits of the result. */
4642 static bool
4643 vect_truncatable_operation_p (tree_code code)
4645 switch (code)
4647 case PLUS_EXPR:
4648 case MINUS_EXPR:
4649 case MULT_EXPR:
4650 case BIT_AND_EXPR:
4651 case BIT_IOR_EXPR:
4652 case BIT_XOR_EXPR:
4653 case COND_EXPR:
4654 return true;
4656 default:
4657 return false;
4661 /* Record that STMT_INFO could be changed from operating on TYPE to
4662 operating on a type with the precision and sign given by PRECISION
4663 and SIGN respectively. PRECISION is an arbitrary bit precision;
4664 it might not be a whole number of bytes. */
4666 static void
4667 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4668 unsigned int precision, signop sign)
4670 /* Round the precision up to a whole number of bytes. */
4671 precision = vect_element_precision (precision);
4672 if (precision < TYPE_PRECISION (type)
4673 && (!stmt_info->operation_precision
4674 || stmt_info->operation_precision > precision))
4676 stmt_info->operation_precision = precision;
4677 stmt_info->operation_sign = sign;
4681 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4682 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4683 is an arbitrary bit precision; it might not be a whole number of bytes. */
4685 static void
4686 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4687 unsigned int min_input_precision)
4689 /* This operation in isolation only requires the inputs to have
4690 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4691 that MIN_INPUT_PRECISION is a natural precision for the chain
4692 as a whole. E.g. consider something like:
4694 unsigned short *x, *y;
4695 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4697 The right shift can be done on unsigned chars, and only requires the
4698 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4699 approach would mean turning a natural chain of single-vector unsigned
4700 short operations into one that truncates "*x" and then extends
4701 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4702 operation and one vector for each unsigned char operation.
4703 This would be a significant pessimization.
4705 Instead only propagate the maximum of this precision and the precision
4706 required by the users of the result. This means that we don't pessimize
4707 the case above but continue to optimize things like:
4709 unsigned char *y;
4710 unsigned short *x;
4711 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4713 Here we would truncate two vectors of *x to a single vector of
4714 unsigned chars and use single-vector unsigned char operations for
4715 everything else, rather than doing two unsigned short copies of
4716 "(*x & 0xf0) >> 4" and then truncating the result. */
4717 min_input_precision = MAX (min_input_precision,
4718 stmt_info->min_output_precision);
4720 if (min_input_precision < TYPE_PRECISION (type)
4721 && (!stmt_info->min_input_precision
4722 || stmt_info->min_input_precision > min_input_precision))
4723 stmt_info->min_input_precision = min_input_precision;
4726 /* Subroutine of vect_determine_min_output_precision. Return true if
4727 we can calculate a reduced number of output bits for STMT_INFO,
4728 whose result is LHS. */
4730 static bool
4731 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4733 /* Take the maximum precision required by users of the result. */
4734 vec_info *vinfo = stmt_info->vinfo;
4735 unsigned int precision = 0;
4736 imm_use_iterator iter;
4737 use_operand_p use;
4738 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4740 gimple *use_stmt = USE_STMT (use);
4741 if (is_gimple_debug (use_stmt))
4742 continue;
4743 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4744 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4745 return false;
4746 /* The input precision recorded for COND_EXPRs applies only to the
4747 "then" and "else" values. */
4748 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4749 if (assign
4750 && gimple_assign_rhs_code (assign) == COND_EXPR
4751 && use->use != gimple_assign_rhs2_ptr (assign)
4752 && use->use != gimple_assign_rhs3_ptr (assign))
4753 return false;
4754 precision = MAX (precision, use_stmt_info->min_input_precision);
4757 if (dump_enabled_p ())
4758 dump_printf_loc (MSG_NOTE, vect_location,
4759 "only the low %d bits of %T are significant\n",
4760 precision, lhs);
4761 stmt_info->min_output_precision = precision;
4762 return true;
4765 /* Calculate min_output_precision for STMT_INFO. */
4767 static void
4768 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4770 /* We're only interested in statements with a narrowable result. */
4771 tree lhs = gimple_get_lhs (stmt_info->stmt);
4772 if (!lhs
4773 || TREE_CODE (lhs) != SSA_NAME
4774 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4775 return;
4777 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4778 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4781 /* Use range information to decide whether STMT (described by STMT_INFO)
4782 could be done in a narrower type. This is effectively a forward
4783 propagation, since it uses context-independent information that applies
4784 to all users of an SSA name. */
4786 static void
4787 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4789 tree lhs = gimple_assign_lhs (stmt);
4790 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4791 return;
4793 tree type = TREE_TYPE (lhs);
4794 if (!vect_narrowable_type_p (type))
4795 return;
4797 /* First see whether we have any useful range information for the result. */
4798 unsigned int precision = TYPE_PRECISION (type);
4799 signop sign = TYPE_SIGN (type);
4800 wide_int min_value, max_value;
4801 if (!vect_get_range_info (lhs, &min_value, &max_value))
4802 return;
4804 tree_code code = gimple_assign_rhs_code (stmt);
4805 unsigned int nops = gimple_num_ops (stmt);
4807 if (!vect_truncatable_operation_p (code))
4808 /* Check that all relevant input operands are compatible, and update
4809 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4810 for (unsigned int i = 1; i < nops; ++i)
4812 tree op = gimple_op (stmt, i);
4813 if (TREE_CODE (op) == INTEGER_CST)
4815 /* Don't require the integer to have RHS_TYPE (which it might
4816 not for things like shift amounts, etc.), but do require it
4817 to fit the type. */
4818 if (!int_fits_type_p (op, type))
4819 return;
4821 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4822 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4824 else if (TREE_CODE (op) == SSA_NAME)
4826 /* Ignore codes that don't take uniform arguments. */
4827 if (!types_compatible_p (TREE_TYPE (op), type))
4828 return;
4830 wide_int op_min_value, op_max_value;
4831 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4832 return;
4834 min_value = wi::min (min_value, op_min_value, sign);
4835 max_value = wi::max (max_value, op_max_value, sign);
4837 else
4838 return;
4841 /* Try to switch signed types for unsigned types if we can.
4842 This is better for two reasons. First, unsigned ops tend
4843 to be cheaper than signed ops. Second, it means that we can
4844 handle things like:
4846 signed char c;
4847 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4851 signed char c;
4852 unsigned short res_1 = (unsigned short) c & 0xff00;
4853 int res = (int) res_1;
4855 where the intermediate result res_1 has unsigned rather than
4856 signed type. */
4857 if (sign == SIGNED && !wi::neg_p (min_value))
4858 sign = UNSIGNED;
4860 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4861 unsigned int precision1 = wi::min_precision (min_value, sign);
4862 unsigned int precision2 = wi::min_precision (max_value, sign);
4863 unsigned int value_precision = MAX (precision1, precision2);
4864 if (value_precision >= precision)
4865 return;
4867 if (dump_enabled_p ())
4868 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4869 " without loss of precision: %G",
4870 sign == SIGNED ? "signed" : "unsigned",
4871 value_precision, stmt);
4873 vect_set_operation_type (stmt_info, type, value_precision, sign);
4874 vect_set_min_input_precision (stmt_info, type, value_precision);
4877 /* Use information about the users of STMT's result to decide whether
4878 STMT (described by STMT_INFO) could be done in a narrower type.
4879 This is effectively a backward propagation. */
4881 static void
4882 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4884 tree_code code = gimple_assign_rhs_code (stmt);
4885 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4886 tree type = TREE_TYPE (gimple_op (stmt, opno));
4887 if (!vect_narrowable_type_p (type))
4888 return;
4890 unsigned int precision = TYPE_PRECISION (type);
4891 unsigned int operation_precision, min_input_precision;
4892 switch (code)
4894 CASE_CONVERT:
4895 /* Only the bits that contribute to the output matter. Don't change
4896 the precision of the operation itself. */
4897 operation_precision = precision;
4898 min_input_precision = stmt_info->min_output_precision;
4899 break;
4901 case LSHIFT_EXPR:
4902 case RSHIFT_EXPR:
4904 tree shift = gimple_assign_rhs2 (stmt);
4905 if (TREE_CODE (shift) != INTEGER_CST
4906 || !wi::ltu_p (wi::to_widest (shift), precision))
4907 return;
4908 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4909 if (code == LSHIFT_EXPR)
4911 /* We need CONST_SHIFT fewer bits of the input. */
4912 operation_precision = stmt_info->min_output_precision;
4913 min_input_precision = (MAX (operation_precision, const_shift)
4914 - const_shift);
4916 else
4918 /* We need CONST_SHIFT extra bits to do the operation. */
4919 operation_precision = (stmt_info->min_output_precision
4920 + const_shift);
4921 min_input_precision = operation_precision;
4923 break;
4926 default:
4927 if (vect_truncatable_operation_p (code))
4929 /* Input bit N has no effect on output bits N-1 and lower. */
4930 operation_precision = stmt_info->min_output_precision;
4931 min_input_precision = operation_precision;
4932 break;
4934 return;
4937 if (operation_precision < precision)
4939 if (dump_enabled_p ())
4940 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4941 " without affecting users: %G",
4942 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4943 operation_precision, stmt);
4944 vect_set_operation_type (stmt_info, type, operation_precision,
4945 TYPE_SIGN (type));
4947 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4950 /* Handle vect_determine_precisions for STMT_INFO, given that we
4951 have already done so for the users of its result. */
4953 void
4954 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4956 vect_determine_min_output_precision (stmt_info);
4957 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4959 vect_determine_precisions_from_range (stmt_info, stmt);
4960 vect_determine_precisions_from_users (stmt_info, stmt);
4964 /* Walk backwards through the vectorizable region to determine the
4965 values of these fields:
4967 - min_output_precision
4968 - min_input_precision
4969 - operation_precision
4970 - operation_sign. */
4972 void
4973 vect_determine_precisions (vec_info *vinfo)
4975 DUMP_VECT_SCOPE ("vect_determine_precisions");
4977 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4979 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4980 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4981 unsigned int nbbs = loop->num_nodes;
4983 for (unsigned int i = 0; i < nbbs; i++)
4985 basic_block bb = bbs[nbbs - i - 1];
4986 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4987 !gsi_end_p (si); gsi_prev (&si))
4988 vect_determine_stmt_precisions
4989 (vinfo->lookup_stmt (gsi_stmt (si)));
4992 else
4994 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4995 gimple_stmt_iterator si = bb_vinfo->region_end;
4996 gimple *stmt;
4999 if (!gsi_stmt (si))
5000 si = gsi_last_bb (bb_vinfo->bb);
5001 else
5002 gsi_prev (&si);
5003 stmt = gsi_stmt (si);
5004 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
5005 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5006 vect_determine_stmt_precisions (stmt_info);
5008 while (stmt != gsi_stmt (bb_vinfo->region_begin));
5012 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
5014 struct vect_recog_func
5016 vect_recog_func_ptr fn;
5017 const char *name;
5020 /* Note that ordering matters - the first pattern matching on a stmt is
5021 taken which means usually the more complex one needs to preceed the
5022 less comples onex (widen_sum only after dot_prod or sad for example). */
5023 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5024 { vect_recog_over_widening_pattern, "over_widening" },
5025 /* Must come after over_widening, which narrows the shift as much as
5026 possible beforehand. */
5027 { vect_recog_average_pattern, "average" },
5028 { vect_recog_mulhs_pattern, "mult_high" },
5029 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5030 { vect_recog_widen_mult_pattern, "widen_mult" },
5031 { vect_recog_dot_prod_pattern, "dot_prod" },
5032 { vect_recog_sad_pattern, "sad" },
5033 { vect_recog_widen_sum_pattern, "widen_sum" },
5034 { vect_recog_pow_pattern, "pow" },
5035 { vect_recog_widen_shift_pattern, "widen_shift" },
5036 { vect_recog_rotate_pattern, "rotate" },
5037 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5038 { vect_recog_divmod_pattern, "divmod" },
5039 { vect_recog_mult_pattern, "mult" },
5040 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5041 { vect_recog_bool_pattern, "bool" },
5042 /* This must come before mask conversion, and includes the parts
5043 of mask conversion that are needed for gather and scatter
5044 internal functions. */
5045 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5046 { vect_recog_mask_conversion_pattern, "mask_conversion" }
5049 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5051 /* Mark statements that are involved in a pattern. */
5053 static inline void
5054 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5055 tree pattern_vectype)
5057 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5058 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5060 gimple *orig_pattern_stmt = NULL;
5061 if (is_pattern_stmt_p (orig_stmt_info))
5063 /* We're replacing a statement in an existing pattern definition
5064 sequence. */
5065 orig_pattern_stmt = orig_stmt_info->stmt;
5066 if (dump_enabled_p ())
5067 dump_printf_loc (MSG_NOTE, vect_location,
5068 "replacing earlier pattern %G", orig_pattern_stmt);
5070 /* To keep the book-keeping simple, just swap the lhs of the
5071 old and new statements, so that the old one has a valid but
5072 unused lhs. */
5073 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5074 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5075 gimple_set_lhs (pattern_stmt, old_lhs);
5077 if (dump_enabled_p ())
5078 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5080 /* Switch to the statement that ORIG replaces. */
5081 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5083 /* We shouldn't be replacing the main pattern statement. */
5084 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5085 != orig_pattern_stmt);
5088 if (def_seq)
5089 for (gimple_stmt_iterator si = gsi_start (def_seq);
5090 !gsi_end_p (si); gsi_next (&si))
5092 if (dump_enabled_p ())
5093 dump_printf_loc (MSG_NOTE, vect_location,
5094 "extra pattern stmt: %G", gsi_stmt (si));
5095 stmt_vec_info pattern_stmt_info
5096 = vect_init_pattern_stmt (gsi_stmt (si),
5097 orig_stmt_info, pattern_vectype);
5098 /* Stmts in the def sequence are not vectorizable cycle or
5099 induction defs, instead they should all be vect_internal_def
5100 feeding the main pattern stmt which retains this def type. */
5101 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5104 if (orig_pattern_stmt)
5106 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
5108 /* Insert all the new pattern statements before the original one. */
5109 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5110 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5111 orig_def_seq);
5112 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5113 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5115 /* Remove the pattern statement that this new pattern replaces. */
5116 gsi_remove (&gsi, false);
5118 else
5119 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
5121 /* Transfer reduction path info to the pattern. */
5122 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5124 vec_info *vinfo = orig_stmt_info_saved->vinfo;
5125 tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5126 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5127 /* Search the pattern def sequence and the main pattern stmt. Note
5128 we may have inserted all into a containing pattern def sequence
5129 so the following is a bit awkward. */
5130 gimple_stmt_iterator si;
5131 gimple *s;
5132 if (def_seq)
5134 si = gsi_start (def_seq);
5135 s = gsi_stmt (si);
5136 gsi_next (&si);
5138 else
5140 si = gsi_none ();
5141 s = pattern_stmt;
5145 bool found = false;
5146 for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5147 if (gimple_op (s, i) == lookfor)
5149 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5150 lookfor = gimple_get_lhs (s);
5151 found = true;
5152 break;
5154 if (s == pattern_stmt)
5156 if (!found && dump_enabled_p ())
5157 dump_printf_loc (MSG_NOTE, vect_location,
5158 "failed to update reduction index.\n");
5159 break;
5161 if (gsi_end_p (si))
5162 s = pattern_stmt;
5163 else
5165 s = gsi_stmt (si);
5166 if (s == pattern_stmt)
5167 /* Found the end inside a bigger pattern def seq. */
5168 si = gsi_none ();
5169 else
5170 gsi_next (&si);
5172 } while (1);
5176 /* Function vect_pattern_recog_1
5178 Input:
5179 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5180 computation pattern.
5181 STMT_INFO: A stmt from which the pattern search should start.
5183 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5184 a sequence of statements that has the same functionality and can be
5185 used to replace STMT_INFO. It returns the last statement in the sequence
5186 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5187 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5188 statement, having first checked that the target supports the new operation
5189 in that type.
5191 This function also does some bookkeeping, as explained in the documentation
5192 for vect_recog_pattern. */
5194 static void
5195 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
5197 vec_info *vinfo = stmt_info->vinfo;
5198 gimple *pattern_stmt;
5199 loop_vec_info loop_vinfo;
5200 tree pattern_vectype;
5202 /* If this statement has already been replaced with pattern statements,
5203 leave the original statement alone, since the first match wins.
5204 Instead try to match against the definition statements that feed
5205 the main pattern statement. */
5206 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5208 gimple_stmt_iterator gsi;
5209 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5210 !gsi_end_p (gsi); gsi_next (&gsi))
5211 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
5212 return;
5215 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5216 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
5217 if (!pattern_stmt)
5219 /* Clear any half-formed pattern definition sequence. */
5220 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5221 return;
5224 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5225 gcc_assert (pattern_vectype);
5227 /* Found a vectorizable pattern. */
5228 if (dump_enabled_p ())
5229 dump_printf_loc (MSG_NOTE, vect_location,
5230 "%s pattern recognized: %G",
5231 recog_func->name, pattern_stmt);
5233 /* Mark the stmts that are involved in the pattern. */
5234 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
5236 /* Patterns cannot be vectorized using SLP, because they change the order of
5237 computation. */
5238 if (loop_vinfo)
5240 unsigned ix, ix2;
5241 stmt_vec_info *elem_ptr;
5242 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5243 elem_ptr, *elem_ptr == stmt_info);
5248 /* Function vect_pattern_recog
5250 Input:
5251 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5252 computation idioms.
5254 Output - for each computation idiom that is detected we create a new stmt
5255 that provides the same functionality and that can be vectorized. We
5256 also record some information in the struct_stmt_info of the relevant
5257 stmts, as explained below:
5259 At the entry to this function we have the following stmts, with the
5260 following initial value in the STMT_VINFO fields:
5262 stmt in_pattern_p related_stmt vec_stmt
5263 S1: a_i = .... - - -
5264 S2: a_2 = ..use(a_i).. - - -
5265 S3: a_1 = ..use(a_2).. - - -
5266 S4: a_0 = ..use(a_1).. - - -
5267 S5: ... = ..use(a_0).. - - -
5269 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5270 represented by a single stmt. We then:
5271 - create a new stmt S6 equivalent to the pattern (the stmt is not
5272 inserted into the code)
5273 - fill in the STMT_VINFO fields as follows:
5275 in_pattern_p related_stmt vec_stmt
5276 S1: a_i = .... - - -
5277 S2: a_2 = ..use(a_i).. - - -
5278 S3: a_1 = ..use(a_2).. - - -
5279 S4: a_0 = ..use(a_1).. true S6 -
5280 '---> S6: a_new = .... - S4 -
5281 S5: ... = ..use(a_0).. - - -
5283 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5284 to each other through the RELATED_STMT field).
5286 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5287 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5288 remain irrelevant unless used by stmts other than S4.
5290 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5291 (because they are marked as irrelevant). It will vectorize S6, and record
5292 a pointer to the new vector stmt VS6 from S6 (as usual).
5293 S4 will be skipped, and S5 will be vectorized as usual:
5295 in_pattern_p related_stmt vec_stmt
5296 S1: a_i = .... - - -
5297 S2: a_2 = ..use(a_i).. - - -
5298 S3: a_1 = ..use(a_2).. - - -
5299 > VS6: va_new = .... - - -
5300 S4: a_0 = ..use(a_1).. true S6 VS6
5301 '---> S6: a_new = .... - S4 VS6
5302 > VS5: ... = ..vuse(va_new).. - - -
5303 S5: ... = ..use(a_0).. - - -
5305 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5306 elsewhere), and we'll end up with:
5308 VS6: va_new = ....
5309 VS5: ... = ..vuse(va_new)..
5311 In case of more than one pattern statements, e.g., widen-mult with
5312 intermediate type:
5314 S1 a_t = ;
5315 S2 a_T = (TYPE) a_t;
5316 '--> S3: a_it = (interm_type) a_t;
5317 S4 prod_T = a_T * CONST;
5318 '--> S5: prod_T' = a_it w* CONST;
5320 there may be other users of a_T outside the pattern. In that case S2 will
5321 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5322 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5323 be recorded in S3. */
5325 void
5326 vect_pattern_recog (vec_info *vinfo)
5328 class loop *loop;
5329 basic_block *bbs;
5330 unsigned int nbbs;
5331 gimple_stmt_iterator si;
5332 unsigned int i, j;
5334 vect_determine_precisions (vinfo);
5336 DUMP_VECT_SCOPE ("vect_pattern_recog");
5338 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5340 loop = LOOP_VINFO_LOOP (loop_vinfo);
5341 bbs = LOOP_VINFO_BBS (loop_vinfo);
5342 nbbs = loop->num_nodes;
5344 /* Scan through the loop stmts, applying the pattern recognition
5345 functions starting at each stmt visited: */
5346 for (i = 0; i < nbbs; i++)
5348 basic_block bb = bbs[i];
5349 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5351 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5352 /* Scan over all generic vect_recog_xxx_pattern functions. */
5353 for (j = 0; j < NUM_PATTERNS; j++)
5354 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
5355 stmt_info);
5359 else
5361 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5362 for (si = bb_vinfo->region_begin;
5363 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
5365 gimple *stmt = gsi_stmt (si);
5366 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
5367 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
5368 continue;
5370 /* Scan over all generic vect_recog_xxx_pattern functions. */
5371 for (j = 0; j < NUM_PATTERNS; j++)
5372 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);