[37/46] dr_aux tweaks
[official-gcc.git] / gcc / tree-vect-patterns.c
blob6adc41375f432c0f60be0cd86f0e1f854c3862fb
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
53 static bool
54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
56 value_range_type vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
71 return true;
73 else
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
80 return false;
84 /* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
87 static void
88 vect_pattern_detected (const char *name, gimple *stmt)
90 if (dump_enabled_p ())
92 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: ", name);
93 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
97 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
98 return the pattern statement's stmt_vec_info. Set its vector type to
99 VECTYPE if it doesn't have one already. */
101 static stmt_vec_info
102 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
103 tree vectype)
105 vec_info *vinfo = orig_stmt_info->vinfo;
106 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
107 if (pattern_stmt_info == NULL_STMT_VEC_INFO)
108 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
109 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
111 pattern_stmt_info->pattern_stmt_p = true;
112 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
113 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
114 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
115 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
116 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
117 return pattern_stmt_info;
120 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
121 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
122 have one already. */
124 static void
125 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
126 tree vectype)
128 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
129 STMT_VINFO_RELATED_STMT (orig_stmt_info)
130 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
133 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
134 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
135 be different from the vector type of the final pattern statement. */
137 static inline void
138 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
139 tree vectype = NULL_TREE)
141 vec_info *vinfo = stmt_info->vinfo;
142 if (vectype)
144 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
145 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
147 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
148 new_stmt);
151 /* The caller wants to perform new operations on vect_external variable
152 VAR, so that the result of the operations would also be vect_external.
153 Return the edge on which the operations can be performed, if one exists.
154 Return null if the operations should instead be treated as part of
155 the pattern that needs them. */
157 static edge
158 vect_get_external_def_edge (vec_info *vinfo, tree var)
160 edge e = NULL;
161 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
163 e = loop_preheader_edge (loop_vinfo->loop);
164 if (!SSA_NAME_IS_DEFAULT_DEF (var))
166 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
167 if (bb == NULL
168 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
169 e = NULL;
172 return e;
175 /* Return true if the target supports a vector version of CODE,
176 where CODE is known to map to a direct optab. ITYPE specifies
177 the type of (some of) the scalar inputs and OTYPE specifies the
178 type of the scalar result.
180 If CODE allows the inputs and outputs to have different type
181 (such as for WIDEN_SUM_EXPR), it is the input mode rather
182 than the output mode that determines the appropriate target pattern.
183 Operand 0 of the target pattern then specifies the mode that the output
184 must have.
186 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
187 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
188 is nonnull. */
190 static bool
191 vect_supportable_direct_optab_p (tree otype, tree_code code,
192 tree itype, tree *vecotype_out,
193 tree *vecitype_out = NULL)
195 tree vecitype = get_vectype_for_scalar_type (itype);
196 if (!vecitype)
197 return false;
199 tree vecotype = get_vectype_for_scalar_type (otype);
200 if (!vecotype)
201 return false;
203 optab optab = optab_for_tree_code (code, vecitype, optab_default);
204 if (!optab)
205 return false;
207 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
208 if (icode == CODE_FOR_nothing
209 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
210 return false;
212 *vecotype_out = vecotype;
213 if (vecitype_out)
214 *vecitype_out = vecitype;
215 return true;
218 /* Round bit precision PRECISION up to a full element. */
220 static unsigned int
221 vect_element_precision (unsigned int precision)
223 precision = 1 << ceil_log2 (precision);
224 return MAX (precision, BITS_PER_UNIT);
227 /* If OP is defined by a statement that's being considered for vectorization,
228 return information about that statement, otherwise return NULL. */
230 static stmt_vec_info
231 vect_get_internal_def (vec_info *vinfo, tree op)
233 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
234 if (def_stmt_info
235 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
236 return def_stmt_info;
237 return NULL;
240 /* Check whether NAME, an ssa-name used in STMT_VINFO,
241 is a result of a type promotion, such that:
242 DEF_STMT: NAME = NOP (name0)
243 If CHECK_SIGN is TRUE, check that either both types are signed or both are
244 unsigned. */
246 static bool
247 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
248 tree *orig_type, gimple **def_stmt, bool *promotion)
250 tree type = TREE_TYPE (name);
251 tree oprnd0;
252 enum vect_def_type dt;
254 stmt_vec_info def_stmt_info;
255 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
256 def_stmt))
257 return false;
259 if (dt != vect_internal_def
260 && dt != vect_external_def && dt != vect_constant_def)
261 return false;
263 if (!*def_stmt)
264 return false;
266 if (!is_gimple_assign (*def_stmt))
267 return false;
269 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
270 return false;
272 oprnd0 = gimple_assign_rhs1 (*def_stmt);
274 *orig_type = TREE_TYPE (oprnd0);
275 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
276 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
277 return false;
279 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
280 *promotion = true;
281 else
282 *promotion = false;
284 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
285 return false;
287 return true;
290 /* Holds information about an input operand after some sign changes
291 and type promotions have been peeled away. */
292 struct vect_unpromoted_value {
293 vect_unpromoted_value ();
295 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
297 /* The value obtained after peeling away zero or more casts. */
298 tree op;
300 /* The type of OP. */
301 tree type;
303 /* The definition type of OP. */
304 vect_def_type dt;
306 /* If OP is the result of peeling at least one cast, and if the cast
307 of OP itself is a vectorizable statement, CASTER identifies that
308 statement, otherwise it is null. */
309 stmt_vec_info caster;
312 inline vect_unpromoted_value::vect_unpromoted_value ()
313 : op (NULL_TREE),
314 type (NULL_TREE),
315 dt (vect_uninitialized_def),
316 caster (NULL)
320 /* Set the operand to OP_IN, its definition type to DT_IN, and the
321 statement that casts it to CASTER_IN. */
323 inline void
324 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
325 stmt_vec_info caster_in)
327 op = op_in;
328 type = TREE_TYPE (op);
329 dt = dt_in;
330 caster = caster_in;
333 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
334 to reach some vectorizable inner operand OP', continuing as long as it
335 is possible to convert OP' back to OP using a possible sign change
336 followed by a possible promotion P. Return this OP', or null if OP is
337 not a vectorizable SSA name. If there is a promotion P, describe its
338 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
339 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
340 have more than one user.
342 A successful return means that it is possible to go from OP' to OP
343 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
344 whereas the cast from UNPROM to OP might be a promotion, a sign
345 change, or a nop.
347 E.g. say we have:
349 signed short *ptr = ...;
350 signed short C = *ptr;
351 unsigned short B = (unsigned short) C; // sign change
352 signed int A = (signed int) B; // unsigned promotion
353 ...possible other uses of A...
354 unsigned int OP = (unsigned int) A; // sign change
356 In this case it's possible to go directly from C to OP using:
358 OP = (unsigned int) (unsigned short) C;
359 +------------+ +--------------+
360 promotion sign change
362 so OP' would be C. The input to the promotion is B, so UNPROM
363 would describe B. */
365 static tree
366 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
367 vect_unpromoted_value *unprom,
368 bool *single_use_p = NULL)
370 tree res = NULL_TREE;
371 tree op_type = TREE_TYPE (op);
372 unsigned int orig_precision = TYPE_PRECISION (op_type);
373 stmt_vec_info caster = NULL;
374 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
376 /* See whether OP is simple enough to vectorize. */
377 stmt_vec_info def_stmt_info;
378 gimple *def_stmt;
379 vect_def_type dt;
380 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
381 break;
383 /* If OP is the input of a demotion, skip over it to see whether
384 OP is itself the result of a promotion. If so, the combined
385 effect of the promotion and the demotion might fit the required
386 pattern, otherwise neither operation fits.
388 This copes with cases such as the result of an arithmetic
389 operation being truncated before being stored, and where that
390 arithmetic operation has been recognized as an over-widened one. */
391 if (TYPE_PRECISION (op_type) <= orig_precision)
393 /* Use OP as the UNPROM described above if we haven't yet
394 found a promotion, or if using the new input preserves the
395 sign of the previous promotion. */
396 if (!res
397 || TYPE_PRECISION (unprom->type) == orig_precision
398 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
399 unprom->set_op (op, dt, caster);
400 /* Stop if we've already seen a promotion and if this
401 conversion does more than change the sign. */
402 else if (TYPE_PRECISION (op_type)
403 != TYPE_PRECISION (unprom->type))
404 break;
406 /* The sequence now extends to OP. */
407 res = op;
410 /* See whether OP is defined by a cast. Record it as CASTER if
411 the cast is potentially vectorizable. */
412 if (!def_stmt)
413 break;
414 caster = def_stmt_info;
416 /* Ignore pattern statements, since we don't link uses for them. */
417 if (caster
418 && single_use_p
419 && !STMT_VINFO_RELATED_STMT (caster)
420 && !has_single_use (res))
421 *single_use_p = false;
423 gassign *assign = dyn_cast <gassign *> (def_stmt);
424 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
425 break;
427 /* Continue with the input to the cast. */
428 op = gimple_assign_rhs1 (def_stmt);
429 op_type = TREE_TYPE (op);
431 return res;
434 /* OP is an integer operand to an operation that returns TYPE, and we
435 want to treat the operation as a widening one. So far we can treat
436 it as widening from *COMMON_TYPE.
438 Return true if OP is suitable for such a widening operation,
439 either widening from *COMMON_TYPE or from some supertype of it.
440 Update *COMMON_TYPE to the supertype in the latter case.
442 SHIFT_P is true if OP is a shift amount. */
444 static bool
445 vect_joust_widened_integer (tree type, bool shift_p, tree op,
446 tree *common_type)
448 /* Calculate the minimum precision required by OP, without changing
449 the sign of either operand. */
450 unsigned int precision;
451 if (shift_p)
453 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
454 return false;
455 precision = TREE_INT_CST_LOW (op);
457 else
459 precision = wi::min_precision (wi::to_widest (op),
460 TYPE_SIGN (*common_type));
461 if (precision * 2 > TYPE_PRECISION (type))
462 return false;
465 /* If OP requires a wider type, switch to that type. The checks
466 above ensure that this is still narrower than the result. */
467 precision = vect_element_precision (precision);
468 if (TYPE_PRECISION (*common_type) < precision)
469 *common_type = build_nonstandard_integer_type
470 (precision, TYPE_UNSIGNED (*common_type));
471 return true;
474 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
475 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
477 static bool
478 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
480 if (types_compatible_p (*common_type, new_type))
481 return true;
483 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
484 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
485 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
486 return true;
488 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
489 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
490 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
492 *common_type = new_type;
493 return true;
496 /* We have mismatched signs, with the signed type being
497 no wider than the unsigned type. In this case we need
498 a wider signed type. */
499 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
500 TYPE_PRECISION (new_type));
501 precision *= 2;
502 if (precision * 2 > TYPE_PRECISION (type))
503 return false;
505 *common_type = build_nonstandard_integer_type (precision, false);
506 return true;
509 /* Check whether STMT_INFO can be viewed as a tree of integer operations
510 in which each node either performs CODE or WIDENED_CODE, and where
511 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
512 specifies the maximum number of leaf operands. SHIFT_P says whether
513 CODE and WIDENED_CODE are some sort of shift.
515 If STMT_INFO is such a tree, return the number of leaf operands
516 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
517 to a type that (a) is narrower than the result of STMT_INFO and
518 (b) can hold all leaf operand values.
520 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
521 exists. */
523 static unsigned int
524 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
525 tree_code widened_code, bool shift_p,
526 unsigned int max_nops,
527 vect_unpromoted_value *unprom, tree *common_type)
529 /* Check for an integer operation with the right code. */
530 vec_info *vinfo = stmt_info->vinfo;
531 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
532 if (!assign)
533 return 0;
535 tree_code rhs_code = gimple_assign_rhs_code (assign);
536 if (rhs_code != code && rhs_code != widened_code)
537 return 0;
539 tree type = gimple_expr_type (assign);
540 if (!INTEGRAL_TYPE_P (type))
541 return 0;
543 /* Assume that both operands will be leaf operands. */
544 max_nops -= 2;
546 /* Check the operands. */
547 unsigned int next_op = 0;
548 for (unsigned int i = 0; i < 2; ++i)
550 vect_unpromoted_value *this_unprom = &unprom[next_op];
551 unsigned int nops = 1;
552 tree op = gimple_op (assign, i + 1);
553 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
555 /* We already have a common type from earlier operands.
556 Update it to account for OP. */
557 this_unprom->set_op (op, vect_constant_def);
558 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
559 return 0;
561 else
563 /* Only allow shifts by constants. */
564 if (shift_p && i == 1)
565 return 0;
567 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
568 this_unprom))
569 return 0;
571 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
573 /* The operand isn't widened. If STMT_INFO has the code
574 for an unwidened operation, recursively check whether
575 this operand is a node of the tree. */
576 if (rhs_code != code
577 || max_nops == 0
578 || this_unprom->dt != vect_internal_def)
579 return 0;
581 /* Give back the leaf slot allocated above now that we're
582 not treating this as a leaf operand. */
583 max_nops += 1;
585 /* Recursively process the definition of the operand. */
586 stmt_vec_info def_stmt_info
587 = vinfo->lookup_def (this_unprom->op);
588 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
589 shift_p, max_nops, this_unprom,
590 common_type);
591 if (nops == 0)
592 return 0;
594 max_nops -= nops;
596 else
598 /* Make sure that the operand is narrower than the result. */
599 if (TYPE_PRECISION (this_unprom->type) * 2
600 > TYPE_PRECISION (type))
601 return 0;
603 /* Update COMMON_TYPE for the new operand. */
604 if (i == 0)
605 *common_type = this_unprom->type;
606 else if (!vect_joust_widened_type (type, this_unprom->type,
607 common_type))
608 return 0;
611 next_op += nops;
613 return next_op;
616 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
617 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
619 static tree
620 vect_recog_temp_ssa_var (tree type, gimple *stmt)
622 return make_temp_ssa_name (type, stmt, "patt");
625 /* STMT2_INFO describes a type conversion that could be split into STMT1
626 followed by a version of STMT2_INFO that takes NEW_RHS as its first
627 input. Try to do this using pattern statements, returning true on
628 success. */
630 static bool
631 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
632 gimple *stmt1, tree vectype)
634 if (is_pattern_stmt_p (stmt2_info))
636 /* STMT2_INFO is part of a pattern. Get the statement to which
637 the pattern is attached. */
638 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
639 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
641 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "Splitting pattern statement: ");
645 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
648 /* Since STMT2_INFO is a pattern statement, we can change it
649 in-situ without worrying about changing the code for the
650 containing block. */
651 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
653 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE, vect_location, "into: ");
656 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
657 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
661 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
662 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
663 /* STMT2_INFO is the actual pattern statement. Add STMT1
664 to the end of the definition sequence. */
665 gimple_seq_add_stmt_without_update (def_seq, stmt1);
666 else
668 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
669 before it. */
670 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
671 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
673 return true;
675 else
677 /* STMT2_INFO doesn't yet have a pattern. Try to create a
678 two-statement pattern now. */
679 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
680 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
681 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
682 if (!lhs_vectype)
683 return false;
685 if (dump_enabled_p ())
687 dump_printf_loc (MSG_NOTE, vect_location,
688 "Splitting statement: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
692 /* Add STMT1 as a singleton pattern definition sequence. */
693 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
694 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
695 gimple_seq_add_stmt_without_update (def_seq, stmt1);
697 /* Build the second of the two pattern statements. */
698 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
699 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
700 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
702 if (dump_enabled_p ())
704 dump_printf_loc (MSG_NOTE, vect_location,
705 "into pattern statements: ");
706 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
707 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
708 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt2, 0);
711 return true;
715 /* Convert UNPROM to TYPE and return the result, adding new statements
716 to STMT_INFO's pattern definition statements if no better way is
717 available. VECTYPE is the vector form of TYPE. */
719 static tree
720 vect_convert_input (stmt_vec_info stmt_info, tree type,
721 vect_unpromoted_value *unprom, tree vectype)
723 /* Check for a no-op conversion. */
724 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
725 return unprom->op;
727 /* Allow the caller to create constant vect_unpromoted_values. */
728 if (TREE_CODE (unprom->op) == INTEGER_CST)
729 return wide_int_to_tree (type, wi::to_widest (unprom->op));
731 /* See if we can reuse an existing result. */
732 if (unprom->caster)
734 tree lhs = gimple_get_lhs (unprom->caster->stmt);
735 if (types_compatible_p (TREE_TYPE (lhs), type))
736 return lhs;
739 /* We need a new conversion statement. */
740 tree new_op = vect_recog_temp_ssa_var (type, NULL);
741 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
743 /* If the operation is the input to a vectorizable cast, try splitting
744 that cast into two, taking the required result as a mid-way point. */
745 if (unprom->caster)
747 tree lhs = gimple_get_lhs (unprom->caster->stmt);
748 if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
749 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
750 && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
751 && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
752 return new_op;
755 /* If OP is an external value, see if we can insert the new statement
756 on an incoming edge. */
757 if (unprom->dt == vect_external_def)
758 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
760 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
761 gcc_assert (!new_bb);
762 return new_op;
765 /* As a (common) last resort, add the statement to the pattern itself. */
766 append_pattern_def_seq (stmt_info, new_stmt, vectype);
767 return new_op;
770 /* Invoke vect_convert_input for N elements of UNPROM and store the
771 result in the corresponding elements of RESULT. */
773 static void
774 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
775 tree *result, tree type, vect_unpromoted_value *unprom,
776 tree vectype)
778 for (unsigned int i = 0; i < n; ++i)
780 unsigned int j;
781 for (j = 0; j < i; ++j)
782 if (unprom[j].op == unprom[i].op)
783 break;
784 if (j < i)
785 result[i] = result[j];
786 else
787 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
791 /* The caller has created a (possibly empty) sequence of pattern definition
792 statements followed by a single statement PATTERN_STMT. Cast the result
793 of this final statement to TYPE. If a new statement is needed, add
794 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
795 and return the new statement, otherwise return PATTERN_STMT as-is.
796 VECITYPE is the vector form of PATTERN_STMT's result type. */
798 static gimple *
799 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
800 tree vecitype)
802 tree lhs = gimple_get_lhs (pattern_stmt);
803 if (!types_compatible_p (type, TREE_TYPE (lhs)))
805 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
806 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
807 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
809 return pattern_stmt;
812 /* Return true if STMT_VINFO describes a reduction for which reassociation
813 is allowed. If STMT_INFO is part of a group, assume that it's part of
814 a reduction chain and optimistically assume that all statements
815 except the last allow reassociation. */
817 static bool
818 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
820 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
821 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
822 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL_STMT_VEC_INFO);
825 /* As above, but also require it to have code CODE and to be a reduction
826 in the outermost loop. When returning true, store the operands in
827 *OP0_OUT and *OP1_OUT. */
829 static bool
830 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
831 tree *op0_out, tree *op1_out)
833 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
834 if (!loop_info)
835 return false;
837 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
838 if (!assign || gimple_assign_rhs_code (assign) != code)
839 return false;
841 /* We don't allow changing the order of the computation in the inner-loop
842 when doing outer-loop vectorization. */
843 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
844 if (loop && nested_in_vect_loop_p (loop, stmt_info))
845 return false;
847 if (!vect_reassociating_reduction_p (stmt_info))
848 return false;
850 *op0_out = gimple_assign_rhs1 (assign);
851 *op1_out = gimple_assign_rhs2 (assign);
852 return true;
855 /* Function vect_recog_dot_prod_pattern
857 Try to find the following pattern:
859 type x_t, y_t;
860 TYPE1 prod;
861 TYPE2 sum = init;
862 loop:
863 sum_0 = phi <init, sum_1>
864 S1 x_t = ...
865 S2 y_t = ...
866 S3 x_T = (TYPE1) x_t;
867 S4 y_T = (TYPE1) y_t;
868 S5 prod = x_T * y_T;
869 [S6 prod = (TYPE2) prod; #optional]
870 S7 sum_1 = prod + sum_0;
872 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
873 same size of 'TYPE1' or bigger. This is a special case of a reduction
874 computation.
876 Input:
878 * STMT_VINFO: The stmt from which the pattern search begins. In the
879 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
880 will be detected.
882 Output:
884 * TYPE_OUT: The type of the output of this pattern.
886 * Return value: A new stmt that will be used to replace the sequence of
887 stmts that constitute the pattern. In this case it will be:
888 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
890 Note: The dot-prod idiom is a widening reduction pattern that is
891 vectorized without preserving all the intermediate results. It
892 produces only N/2 (widened) results (by summing up pairs of
893 intermediate results) rather than all N results. Therefore, we
894 cannot allow this pattern when we want to get all the results and in
895 the correct order (as is the case when this computation is in an
896 inner-loop nested in an outer-loop that us being vectorized). */
898 static gimple *
899 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
901 tree oprnd0, oprnd1;
902 gimple *last_stmt = stmt_vinfo->stmt;
903 vec_info *vinfo = stmt_vinfo->vinfo;
904 tree type, half_type;
905 gimple *pattern_stmt;
906 tree var;
908 /* Look for the following pattern
909 DX = (TYPE1) X;
910 DY = (TYPE1) Y;
911 DPROD = DX * DY;
912 DDPROD = (TYPE2) DPROD;
913 sum_1 = DDPROD + sum_0;
914 In which
915 - DX is double the size of X
916 - DY is double the size of Y
917 - DX, DY, DPROD all have the same type
918 - sum is the same size of DPROD or bigger
919 - sum has been recognized as a reduction variable.
921 This is equivalent to:
922 DPROD = X w* Y; #widen mult
923 sum_1 = DPROD w+ sum_0; #widen summation
925 DPROD = X w* Y; #widen mult
926 sum_1 = DPROD + sum_0; #summation
929 /* Starting from LAST_STMT, follow the defs of its uses in search
930 of the above pattern. */
932 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
933 &oprnd0, &oprnd1))
934 return NULL;
936 type = gimple_expr_type (last_stmt);
938 vect_unpromoted_value unprom_mult;
939 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
941 /* So far so good. Since last_stmt was detected as a (summation) reduction,
942 we know that oprnd1 is the reduction variable (defined by a loop-header
943 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
944 Left to check that oprnd0 is defined by a (widen_)mult_expr */
945 if (!oprnd0)
946 return NULL;
948 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
949 if (!mult_vinfo)
950 return NULL;
952 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
953 inside the loop (in case we are analyzing an outer-loop). */
954 vect_unpromoted_value unprom0[2];
955 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
956 false, 2, unprom0, &half_type))
957 return NULL;
959 /* If there are two widening operations, make sure they agree on
960 the sign of the extension. */
961 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
962 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
963 return NULL;
965 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
967 tree half_vectype;
968 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
969 type_out, &half_vectype))
970 return NULL;
972 /* Get the inputs in the appropriate types. */
973 tree mult_oprnd[2];
974 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
975 unprom0, half_vectype);
977 var = vect_recog_temp_ssa_var (type, NULL);
978 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
979 mult_oprnd[0], mult_oprnd[1], oprnd1);
981 return pattern_stmt;
985 /* Function vect_recog_sad_pattern
987 Try to find the following Sum of Absolute Difference (SAD) pattern:
989 type x_t, y_t;
990 signed TYPE1 diff, abs_diff;
991 TYPE2 sum = init;
992 loop:
993 sum_0 = phi <init, sum_1>
994 S1 x_t = ...
995 S2 y_t = ...
996 S3 x_T = (TYPE1) x_t;
997 S4 y_T = (TYPE1) y_t;
998 S5 diff = x_T - y_T;
999 S6 abs_diff = ABS_EXPR <diff>;
1000 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1001 S8 sum_1 = abs_diff + sum_0;
1003 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1004 same size of 'TYPE1' or bigger. This is a special case of a reduction
1005 computation.
1007 Input:
1009 * STMT_VINFO: The stmt from which the pattern search begins. In the
1010 example, when this function is called with S8, the pattern
1011 {S3,S4,S5,S6,S7,S8} will be detected.
1013 Output:
1015 * TYPE_OUT: The type of the output of this pattern.
1017 * Return value: A new stmt that will be used to replace the sequence of
1018 stmts that constitute the pattern. In this case it will be:
1019 SAD_EXPR <x_t, y_t, sum_0>
1022 static gimple *
1023 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1025 gimple *last_stmt = stmt_vinfo->stmt;
1026 vec_info *vinfo = stmt_vinfo->vinfo;
1027 tree half_type;
1029 /* Look for the following pattern
1030 DX = (TYPE1) X;
1031 DY = (TYPE1) Y;
1032 DDIFF = DX - DY;
1033 DAD = ABS_EXPR <DDIFF>;
1034 DDPROD = (TYPE2) DPROD;
1035 sum_1 = DAD + sum_0;
1036 In which
1037 - DX is at least double the size of X
1038 - DY is at least double the size of Y
1039 - DX, DY, DDIFF, DAD all have the same type
1040 - sum is the same size of DAD or bigger
1041 - sum has been recognized as a reduction variable.
1043 This is equivalent to:
1044 DDIFF = X w- Y; #widen sub
1045 DAD = ABS_EXPR <DDIFF>;
1046 sum_1 = DAD w+ sum_0; #widen summation
1048 DDIFF = X w- Y; #widen sub
1049 DAD = ABS_EXPR <DDIFF>;
1050 sum_1 = DAD + sum_0; #summation
1053 /* Starting from LAST_STMT, follow the defs of its uses in search
1054 of the above pattern. */
1056 tree plus_oprnd0, plus_oprnd1;
1057 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1058 &plus_oprnd0, &plus_oprnd1))
1059 return NULL;
1061 tree sum_type = gimple_expr_type (last_stmt);
1063 /* Any non-truncating sequence of conversions is OK here, since
1064 with a successful match, the result of the ABS(U) is known to fit
1065 within the nonnegative range of the result type. (It cannot be the
1066 negative of the minimum signed value due to the range of the widening
1067 MINUS_EXPR.) */
1068 vect_unpromoted_value unprom_abs;
1069 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1070 &unprom_abs);
1072 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1073 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1074 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1075 Then check that plus_oprnd0 is defined by an abs_expr. */
1077 if (!plus_oprnd0)
1078 return NULL;
1080 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1081 if (!abs_stmt_vinfo)
1082 return NULL;
1084 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1085 inside the loop (in case we are analyzing an outer-loop). */
1086 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1087 if (!abs_stmt
1088 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1089 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1090 return NULL;
1092 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1093 tree abs_type = TREE_TYPE (abs_oprnd);
1094 if (TYPE_UNSIGNED (abs_type))
1095 return NULL;
1097 /* Peel off conversions from the ABS input. This can involve sign
1098 changes (e.g. from an unsigned subtraction to a signed ABS input)
1099 or signed promotion, but it can't include unsigned promotion.
1100 (Note that ABS of an unsigned promotion should have been folded
1101 away before now anyway.) */
1102 vect_unpromoted_value unprom_diff;
1103 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1104 &unprom_diff);
1105 if (!abs_oprnd)
1106 return NULL;
1107 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1108 && TYPE_UNSIGNED (unprom_diff.type))
1109 return NULL;
1111 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1112 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1113 if (!diff_stmt_vinfo)
1114 return NULL;
1116 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1117 inside the loop (in case we are analyzing an outer-loop). */
1118 vect_unpromoted_value unprom[2];
1119 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1120 false, 2, unprom, &half_type))
1121 return NULL;
1123 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1125 tree half_vectype;
1126 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1127 type_out, &half_vectype))
1128 return NULL;
1130 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1131 tree sad_oprnd[2];
1132 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1133 unprom, half_vectype);
1135 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1136 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1137 sad_oprnd[1], plus_oprnd1);
1139 return pattern_stmt;
1142 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1143 so that it can be treated as though it had the form:
1145 A_TYPE a;
1146 B_TYPE b;
1147 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1148 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1149 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1150 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1151 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1153 Try to replace the pattern with:
1155 A_TYPE a;
1156 B_TYPE b;
1157 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1158 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1159 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1160 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1162 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1164 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1165 name of the pattern being matched, for dump purposes. */
1167 static gimple *
1168 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1169 tree_code orig_code, tree_code wide_code,
1170 bool shift_p, const char *name)
1172 gimple *last_stmt = last_stmt_info->stmt;
1174 vect_unpromoted_value unprom[2];
1175 tree half_type;
1176 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1177 shift_p, 2, unprom, &half_type))
1178 return NULL;
1180 /* Pattern detected. */
1181 vect_pattern_detected (name, last_stmt);
1183 tree type = gimple_expr_type (last_stmt);
1184 tree itype = type;
1185 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1186 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1187 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1188 TYPE_UNSIGNED (half_type));
1190 /* Check target support */
1191 tree vectype = get_vectype_for_scalar_type (half_type);
1192 tree vecitype = get_vectype_for_scalar_type (itype);
1193 enum tree_code dummy_code;
1194 int dummy_int;
1195 auto_vec<tree> dummy_vec;
1196 if (!vectype
1197 || !vecitype
1198 || !supportable_widening_operation (wide_code, last_stmt_info,
1199 vecitype, vectype,
1200 &dummy_code, &dummy_code,
1201 &dummy_int, &dummy_vec))
1202 return NULL;
1204 *type_out = get_vectype_for_scalar_type (type);
1205 if (!*type_out)
1206 return NULL;
1208 tree oprnd[2];
1209 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1211 tree var = vect_recog_temp_ssa_var (itype, NULL);
1212 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1213 oprnd[0], oprnd[1]);
1215 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1218 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1219 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1221 static gimple *
1222 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1224 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1225 WIDEN_MULT_EXPR, false,
1226 "vect_recog_widen_mult_pattern");
1229 /* Function vect_recog_pow_pattern
1231 Try to find the following pattern:
1233 x = POW (y, N);
1235 with POW being one of pow, powf, powi, powif and N being
1236 either 2 or 0.5.
1238 Input:
1240 * STMT_VINFO: The stmt from which the pattern search begins.
1242 Output:
1244 * TYPE_OUT: The type of the output of this pattern.
1246 * Return value: A new stmt that will be used to replace the sequence of
1247 stmts that constitute the pattern. In this case it will be:
1248 x = x * x
1250 x = sqrt (x)
1253 static gimple *
1254 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1256 gimple *last_stmt = stmt_vinfo->stmt;
1257 tree base, exp;
1258 gimple *stmt;
1259 tree var;
1261 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1262 return NULL;
1264 switch (gimple_call_combined_fn (last_stmt))
1266 CASE_CFN_POW:
1267 CASE_CFN_POWI:
1268 break;
1270 default:
1271 return NULL;
1274 base = gimple_call_arg (last_stmt, 0);
1275 exp = gimple_call_arg (last_stmt, 1);
1276 if (TREE_CODE (exp) != REAL_CST
1277 && TREE_CODE (exp) != INTEGER_CST)
1279 if (flag_unsafe_math_optimizations
1280 && TREE_CODE (base) == REAL_CST
1281 && !gimple_call_internal_p (last_stmt))
1283 combined_fn log_cfn;
1284 built_in_function exp_bfn;
1285 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1287 case BUILT_IN_POW:
1288 log_cfn = CFN_BUILT_IN_LOG;
1289 exp_bfn = BUILT_IN_EXP;
1290 break;
1291 case BUILT_IN_POWF:
1292 log_cfn = CFN_BUILT_IN_LOGF;
1293 exp_bfn = BUILT_IN_EXPF;
1294 break;
1295 case BUILT_IN_POWL:
1296 log_cfn = CFN_BUILT_IN_LOGL;
1297 exp_bfn = BUILT_IN_EXPL;
1298 break;
1299 default:
1300 return NULL;
1302 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1303 tree exp_decl = builtin_decl_implicit (exp_bfn);
1304 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1305 does that, but if C is a power of 2, we want to use
1306 exp2 (log2 (C) * x) in the non-vectorized version, but for
1307 vectorization we don't have vectorized exp2. */
1308 if (logc
1309 && TREE_CODE (logc) == REAL_CST
1310 && exp_decl
1311 && lookup_attribute ("omp declare simd",
1312 DECL_ATTRIBUTES (exp_decl)))
1314 cgraph_node *node = cgraph_node::get_create (exp_decl);
1315 if (node->simd_clones == NULL)
1317 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1318 || node->definition)
1319 return NULL;
1320 expand_simd_clones (node);
1321 if (node->simd_clones == NULL)
1322 return NULL;
1324 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1325 if (!*type_out)
1326 return NULL;
1327 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1328 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1329 append_pattern_def_seq (stmt_vinfo, g);
1330 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1331 g = gimple_build_call (exp_decl, 1, def);
1332 gimple_call_set_lhs (g, res);
1333 return g;
1337 return NULL;
1340 /* We now have a pow or powi builtin function call with a constant
1341 exponent. */
1343 /* Catch squaring. */
1344 if ((tree_fits_shwi_p (exp)
1345 && tree_to_shwi (exp) == 2)
1346 || (TREE_CODE (exp) == REAL_CST
1347 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1349 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1350 TREE_TYPE (base), type_out))
1351 return NULL;
1353 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1354 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1355 return stmt;
1358 /* Catch square root. */
1359 if (TREE_CODE (exp) == REAL_CST
1360 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1362 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1363 if (*type_out
1364 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1365 OPTIMIZE_FOR_SPEED))
1367 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1368 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1369 gimple_call_set_lhs (stmt, var);
1370 gimple_call_set_nothrow (stmt, true);
1371 return stmt;
1375 return NULL;
1379 /* Function vect_recog_widen_sum_pattern
1381 Try to find the following pattern:
1383 type x_t;
1384 TYPE x_T, sum = init;
1385 loop:
1386 sum_0 = phi <init, sum_1>
1387 S1 x_t = *p;
1388 S2 x_T = (TYPE) x_t;
1389 S3 sum_1 = x_T + sum_0;
1391 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1392 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1393 a special case of a reduction computation.
1395 Input:
1397 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1398 when this function is called with S3, the pattern {S2,S3} will be detected.
1400 Output:
1402 * TYPE_OUT: The type of the output of this pattern.
1404 * Return value: A new stmt that will be used to replace the sequence of
1405 stmts that constitute the pattern. In this case it will be:
1406 WIDEN_SUM <x_t, sum_0>
1408 Note: The widening-sum idiom is a widening reduction pattern that is
1409 vectorized without preserving all the intermediate results. It
1410 produces only N/2 (widened) results (by summing up pairs of
1411 intermediate results) rather than all N results. Therefore, we
1412 cannot allow this pattern when we want to get all the results and in
1413 the correct order (as is the case when this computation is in an
1414 inner-loop nested in an outer-loop that us being vectorized). */
1416 static gimple *
1417 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1419 gimple *last_stmt = stmt_vinfo->stmt;
1420 tree oprnd0, oprnd1;
1421 vec_info *vinfo = stmt_vinfo->vinfo;
1422 tree type;
1423 gimple *pattern_stmt;
1424 tree var;
1426 /* Look for the following pattern
1427 DX = (TYPE) X;
1428 sum_1 = DX + sum_0;
1429 In which DX is at least double the size of X, and sum_1 has been
1430 recognized as a reduction variable.
1433 /* Starting from LAST_STMT, follow the defs of its uses in search
1434 of the above pattern. */
1436 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1437 &oprnd0, &oprnd1))
1438 return NULL;
1440 type = gimple_expr_type (last_stmt);
1442 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1443 we know that oprnd1 is the reduction variable (defined by a loop-header
1444 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1445 Left to check that oprnd0 is defined by a cast from type 'type' to type
1446 'TYPE'. */
1448 vect_unpromoted_value unprom0;
1449 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1450 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1451 return NULL;
1453 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1455 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1456 type_out))
1457 return NULL;
1459 var = vect_recog_temp_ssa_var (type, NULL);
1460 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1462 return pattern_stmt;
1465 /* Recognize cases in which an operation is performed in one type WTYPE
1466 but could be done more efficiently in a narrower type NTYPE. For example,
1467 if we have:
1469 ATYPE a; // narrower than NTYPE
1470 BTYPE b; // narrower than NTYPE
1471 WTYPE aw = (WTYPE) a;
1472 WTYPE bw = (WTYPE) b;
1473 WTYPE res = aw + bw; // only uses of aw and bw
1475 then it would be more efficient to do:
1477 NTYPE an = (NTYPE) a;
1478 NTYPE bn = (NTYPE) b;
1479 NTYPE resn = an + bn;
1480 WTYPE res = (WTYPE) resn;
1482 Other situations include things like:
1484 ATYPE a; // NTYPE or narrower
1485 WTYPE aw = (WTYPE) a;
1486 WTYPE res = aw + b;
1488 when only "(NTYPE) res" is significant. In that case it's more efficient
1489 to truncate "b" and do the operation on NTYPE instead:
1491 NTYPE an = (NTYPE) a;
1492 NTYPE bn = (NTYPE) b; // truncation
1493 NTYPE resn = an + bn;
1494 WTYPE res = (WTYPE) resn;
1496 All users of "res" should then use "resn" instead, making the final
1497 statement dead (not marked as relevant). The final statement is still
1498 needed to maintain the type correctness of the IR.
1500 vect_determine_precisions has already determined the minimum
1501 precison of the operation and the minimum precision required
1502 by users of the result. */
1504 static gimple *
1505 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1507 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1508 if (!last_stmt)
1509 return NULL;
1511 /* See whether we have found that this operation can be done on a
1512 narrower type without changing its semantics. */
1513 unsigned int new_precision = last_stmt_info->operation_precision;
1514 if (!new_precision)
1515 return NULL;
1517 vec_info *vinfo = last_stmt_info->vinfo;
1518 tree lhs = gimple_assign_lhs (last_stmt);
1519 tree type = TREE_TYPE (lhs);
1520 tree_code code = gimple_assign_rhs_code (last_stmt);
1522 /* Keep the first operand of a COND_EXPR as-is: only the other two
1523 operands are interesting. */
1524 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1526 /* Check the operands. */
1527 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1528 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1529 unprom.quick_grow (nops);
1530 unsigned int min_precision = 0;
1531 bool single_use_p = false;
1532 for (unsigned int i = 0; i < nops; ++i)
1534 tree op = gimple_op (last_stmt, first_op + i);
1535 if (TREE_CODE (op) == INTEGER_CST)
1536 unprom[i].set_op (op, vect_constant_def);
1537 else if (TREE_CODE (op) == SSA_NAME)
1539 bool op_single_use_p = true;
1540 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1541 &op_single_use_p))
1542 return NULL;
1543 /* If:
1545 (1) N bits of the result are needed;
1546 (2) all inputs are widened from M<N bits; and
1547 (3) one operand OP is a single-use SSA name
1549 we can shift the M->N widening from OP to the output
1550 without changing the number or type of extensions involved.
1551 This then reduces the number of copies of STMT_INFO.
1553 If instead of (3) more than one operand is a single-use SSA name,
1554 shifting the extension to the output is even more of a win.
1556 If instead:
1558 (1) N bits of the result are needed;
1559 (2) one operand OP2 is widened from M2<N bits;
1560 (3) another operand OP1 is widened from M1<M2 bits; and
1561 (4) both OP1 and OP2 are single-use
1563 the choice is between:
1565 (a) truncating OP2 to M1, doing the operation on M1,
1566 and then widening the result to N
1568 (b) widening OP1 to M2, doing the operation on M2, and then
1569 widening the result to N
1571 Both shift the M2->N widening of the inputs to the output.
1572 (a) additionally shifts the M1->M2 widening to the output;
1573 it requires fewer copies of STMT_INFO but requires an extra
1574 M2->M1 truncation.
1576 Which is better will depend on the complexity and cost of
1577 STMT_INFO, which is hard to predict at this stage. However,
1578 a clear tie-breaker in favor of (b) is the fact that the
1579 truncation in (a) increases the length of the operation chain.
1581 If instead of (4) only one of OP1 or OP2 is single-use,
1582 (b) is still a win over doing the operation in N bits:
1583 it still shifts the M2->N widening on the single-use operand
1584 to the output and reduces the number of STMT_INFO copies.
1586 If neither operand is single-use then operating on fewer than
1587 N bits might lead to more extensions overall. Whether it does
1588 or not depends on global information about the vectorization
1589 region, and whether that's a good trade-off would again
1590 depend on the complexity and cost of the statements involved,
1591 as well as things like register pressure that are not normally
1592 modelled at this stage. We therefore ignore these cases
1593 and just optimize the clear single-use wins above.
1595 Thus we take the maximum precision of the unpromoted operands
1596 and record whether any operand is single-use. */
1597 if (unprom[i].dt == vect_internal_def)
1599 min_precision = MAX (min_precision,
1600 TYPE_PRECISION (unprom[i].type));
1601 single_use_p |= op_single_use_p;
1606 /* Although the operation could be done in operation_precision, we have
1607 to balance that against introducing extra truncations or extensions.
1608 Calculate the minimum precision that can be handled efficiently.
1610 The loop above determined that the operation could be handled
1611 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1612 extension from the inputs to the output without introducing more
1613 instructions, and would reduce the number of instructions required
1614 for STMT_INFO itself.
1616 vect_determine_precisions has also determined that the result only
1617 needs min_output_precision bits. Truncating by a factor of N times
1618 requires a tree of N - 1 instructions, so if TYPE is N times wider
1619 than min_output_precision, doing the operation in TYPE and truncating
1620 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1621 In contrast:
1623 - truncating the input to a unary operation and doing the operation
1624 in the new type requires at most N - 1 + 1 = N instructions per
1625 output vector
1627 - doing the same for a binary operation requires at most
1628 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1630 Both unary and binary operations require fewer instructions than
1631 this if the operands were extended from a suitable truncated form.
1632 Thus there is usually nothing to lose by doing operations in
1633 min_output_precision bits, but there can be something to gain. */
1634 if (!single_use_p)
1635 min_precision = last_stmt_info->min_output_precision;
1636 else
1637 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1639 /* Apply the minimum efficient precision we just calculated. */
1640 if (new_precision < min_precision)
1641 new_precision = min_precision;
1642 if (new_precision >= TYPE_PRECISION (type))
1643 return NULL;
1645 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1647 *type_out = get_vectype_for_scalar_type (type);
1648 if (!*type_out)
1649 return NULL;
1651 /* We've found a viable pattern. Get the new type of the operation. */
1652 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1653 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1655 /* We specifically don't check here whether the target supports the
1656 new operation, since it might be something that a later pattern
1657 wants to rewrite anyway. If targets have a minimum element size
1658 for some optabs, we should pattern-match smaller ops to larger ops
1659 where beneficial. */
1660 tree new_vectype = get_vectype_for_scalar_type (new_type);
1661 if (!new_vectype)
1662 return NULL;
1664 if (dump_enabled_p ())
1666 dump_printf_loc (MSG_NOTE, vect_location, "demoting ");
1667 dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
1668 dump_printf (MSG_NOTE, " to ");
1669 dump_generic_expr (MSG_NOTE, TDF_SLIM, new_type);
1670 dump_printf (MSG_NOTE, "\n");
1673 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1674 tree ops[3] = {};
1675 for (unsigned int i = 1; i < first_op; ++i)
1676 ops[i - 1] = gimple_op (last_stmt, i);
1677 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1678 new_type, &unprom[0], new_vectype);
1680 /* Use the operation to produce a result of type NEW_TYPE. */
1681 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1682 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1683 ops[0], ops[1], ops[2]);
1684 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1686 if (dump_enabled_p ())
1688 dump_printf_loc (MSG_NOTE, vect_location,
1689 "created pattern stmt: ");
1690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1693 pattern_stmt = vect_convert_output (last_stmt_info, type,
1694 pattern_stmt, new_vectype);
1696 return pattern_stmt;
1699 /* Recognize the patterns:
1701 ATYPE a; // narrower than TYPE
1702 BTYPE b; // narrower than TYPE
1703 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1704 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1706 where only the bottom half of avg is used. Try to transform them into:
1708 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1709 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1711 followed by:
1713 TYPE avg = (TYPE) avg';
1715 where NTYPE is no wider than half of TYPE. Since only the bottom half
1716 of avg is used, all or part of the cast of avg' should become redundant. */
1718 static gimple *
1719 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1721 /* Check for a shift right by one bit. */
1722 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1723 vec_info *vinfo = last_stmt_info->vinfo;
1724 if (!last_stmt
1725 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1726 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1727 return NULL;
1729 /* Check that the shift result is wider than the users of the
1730 result need (i.e. that narrowing would be a natural choice). */
1731 tree lhs = gimple_assign_lhs (last_stmt);
1732 tree type = TREE_TYPE (lhs);
1733 unsigned int target_precision
1734 = vect_element_precision (last_stmt_info->min_output_precision);
1735 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1736 return NULL;
1738 /* Get the definition of the shift input. */
1739 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1740 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1741 if (!plus_stmt_info)
1742 return NULL;
1744 /* Check whether the shift input can be seen as a tree of additions on
1745 2 or 3 widened inputs.
1747 Note that the pattern should be a win even if the result of one or
1748 more additions is reused elsewhere: if the pattern matches, we'd be
1749 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1750 internal_fn ifn = IFN_AVG_FLOOR;
1751 vect_unpromoted_value unprom[3];
1752 tree new_type;
1753 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1754 PLUS_EXPR, false, 3,
1755 unprom, &new_type);
1756 if (nops == 0)
1757 return NULL;
1758 if (nops == 3)
1760 /* Check that one operand is 1. */
1761 unsigned int i;
1762 for (i = 0; i < 3; ++i)
1763 if (integer_onep (unprom[i].op))
1764 break;
1765 if (i == 3)
1766 return NULL;
1767 /* Throw away the 1 operand and keep the other two. */
1768 if (i < 2)
1769 unprom[i] = unprom[2];
1770 ifn = IFN_AVG_CEIL;
1773 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1775 /* We know that:
1777 (a) the operation can be viewed as:
1779 TYPE widened0 = (TYPE) UNPROM[0];
1780 TYPE widened1 = (TYPE) UNPROM[1];
1781 TYPE tmp1 = widened0 + widened1 {+ 1};
1782 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1784 (b) the first two statements are equivalent to:
1786 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1787 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1789 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1790 where sensible;
1792 (d) all the operations can be performed correctly at twice the width of
1793 NEW_TYPE, due to the nature of the average operation; and
1795 (e) users of the result of the right shift need only TARGET_PRECISION
1796 bits, where TARGET_PRECISION is no more than half of TYPE's
1797 precision.
1799 Under these circumstances, the only situation in which NEW_TYPE
1800 could be narrower than TARGET_PRECISION is if widened0, widened1
1801 and an addition result are all used more than once. Thus we can
1802 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1803 as "free", whereas widening the result of the average instruction
1804 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1805 therefore better not to go narrower than TARGET_PRECISION. */
1806 if (TYPE_PRECISION (new_type) < target_precision)
1807 new_type = build_nonstandard_integer_type (target_precision,
1808 TYPE_UNSIGNED (new_type));
1810 /* Check for target support. */
1811 tree new_vectype = get_vectype_for_scalar_type (new_type);
1812 if (!new_vectype
1813 || !direct_internal_fn_supported_p (ifn, new_vectype,
1814 OPTIMIZE_FOR_SPEED))
1815 return NULL;
1817 /* The IR requires a valid vector type for the cast result, even though
1818 it's likely to be discarded. */
1819 *type_out = get_vectype_for_scalar_type (type);
1820 if (!*type_out)
1821 return NULL;
1823 /* Generate the IFN_AVG* call. */
1824 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1825 tree new_ops[2];
1826 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1827 unprom, new_vectype);
1828 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1829 new_ops[1]);
1830 gimple_call_set_lhs (average_stmt, new_var);
1831 gimple_set_location (average_stmt, gimple_location (last_stmt));
1833 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_NOTE, vect_location,
1836 "created pattern stmt: ");
1837 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0);
1840 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1843 /* Recognize cases in which the input to a cast is wider than its
1844 output, and the input is fed by a widening operation. Fold this
1845 by removing the unnecessary intermediate widening. E.g.:
1847 unsigned char a;
1848 unsigned int b = (unsigned int) a;
1849 unsigned short c = (unsigned short) b;
1853 unsigned short c = (unsigned short) a;
1855 Although this is rare in input IR, it is an expected side-effect
1856 of the over-widening pattern above.
1858 This is beneficial also for integer-to-float conversions, if the
1859 widened integer has more bits than the float, and if the unwidened
1860 input doesn't. */
1862 static gimple *
1863 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1865 /* Check for a cast, including an integer-to-float conversion. */
1866 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1867 if (!last_stmt)
1868 return NULL;
1869 tree_code code = gimple_assign_rhs_code (last_stmt);
1870 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1871 return NULL;
1873 /* Make sure that the rhs is a scalar with a natural bitsize. */
1874 tree lhs = gimple_assign_lhs (last_stmt);
1875 if (!lhs)
1876 return NULL;
1877 tree lhs_type = TREE_TYPE (lhs);
1878 scalar_mode lhs_mode;
1879 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1880 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1881 return NULL;
1883 /* Check for a narrowing operation (from a vector point of view). */
1884 tree rhs = gimple_assign_rhs1 (last_stmt);
1885 tree rhs_type = TREE_TYPE (rhs);
1886 if (!INTEGRAL_TYPE_P (rhs_type)
1887 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1888 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1889 return NULL;
1891 /* Try to find an unpromoted input. */
1892 vec_info *vinfo = last_stmt_info->vinfo;
1893 vect_unpromoted_value unprom;
1894 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1895 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1896 return NULL;
1898 /* If the bits above RHS_TYPE matter, make sure that they're the
1899 same when extending from UNPROM as they are when extending from RHS. */
1900 if (!INTEGRAL_TYPE_P (lhs_type)
1901 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1902 return NULL;
1904 /* We can get the same result by casting UNPROM directly, to avoid
1905 the unnecessary widening and narrowing. */
1906 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1908 *type_out = get_vectype_for_scalar_type (lhs_type);
1909 if (!*type_out)
1910 return NULL;
1912 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1913 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1914 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1916 return pattern_stmt;
1919 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1920 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1922 static gimple *
1923 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1925 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
1926 WIDEN_LSHIFT_EXPR, true,
1927 "vect_recog_widen_shift_pattern");
1930 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1932 type a_t, b_t, c_t;
1934 S0 a_t = b_t r<< c_t;
1936 Input/Output:
1938 * STMT_VINFO: The stmt from which the pattern search begins,
1939 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1940 with a sequence:
1942 S1 d_t = -c_t;
1943 S2 e_t = d_t & (B - 1);
1944 S3 f_t = b_t << c_t;
1945 S4 g_t = b_t >> e_t;
1946 S0 a_t = f_t | g_t;
1948 where B is element bitsize of type.
1950 Output:
1952 * TYPE_OUT: The type of the output of this pattern.
1954 * Return value: A new stmt that will be used to replace the rotate
1955 S0 stmt. */
1957 static gimple *
1958 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1960 gimple *last_stmt = stmt_vinfo->stmt;
1961 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1962 gimple *pattern_stmt, *def_stmt;
1963 enum tree_code rhs_code;
1964 vec_info *vinfo = stmt_vinfo->vinfo;
1965 enum vect_def_type dt;
1966 optab optab1, optab2;
1967 edge ext_def = NULL;
1969 if (!is_gimple_assign (last_stmt))
1970 return NULL;
1972 rhs_code = gimple_assign_rhs_code (last_stmt);
1973 switch (rhs_code)
1975 case LROTATE_EXPR:
1976 case RROTATE_EXPR:
1977 break;
1978 default:
1979 return NULL;
1982 lhs = gimple_assign_lhs (last_stmt);
1983 oprnd0 = gimple_assign_rhs1 (last_stmt);
1984 type = TREE_TYPE (oprnd0);
1985 oprnd1 = gimple_assign_rhs2 (last_stmt);
1986 if (TREE_CODE (oprnd0) != SSA_NAME
1987 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1988 || !INTEGRAL_TYPE_P (type)
1989 || !TYPE_UNSIGNED (type))
1990 return NULL;
1992 stmt_vec_info def_stmt_info;
1993 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
1994 return NULL;
1996 if (dt != vect_internal_def
1997 && dt != vect_constant_def
1998 && dt != vect_external_def)
1999 return NULL;
2001 vectype = get_vectype_for_scalar_type (type);
2002 if (vectype == NULL_TREE)
2003 return NULL;
2005 /* If vector/vector or vector/scalar rotate is supported by the target,
2006 don't do anything here. */
2007 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2008 if (optab1
2009 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2010 return NULL;
2012 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2014 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2015 if (optab2
2016 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2017 return NULL;
2020 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2021 don't do anything here either. */
2022 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2023 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2024 if (!optab1
2025 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2026 || !optab2
2027 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2029 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2030 return NULL;
2031 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2032 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2033 if (!optab1
2034 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2035 || !optab2
2036 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2037 return NULL;
2040 *type_out = vectype;
2042 if (dt == vect_external_def
2043 && TREE_CODE (oprnd1) == SSA_NAME)
2044 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2046 def = NULL_TREE;
2047 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2048 if (TREE_CODE (oprnd1) == INTEGER_CST
2049 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2050 def = oprnd1;
2051 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2053 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2054 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2055 && TYPE_PRECISION (TREE_TYPE (rhs1))
2056 == TYPE_PRECISION (type))
2057 def = rhs1;
2060 if (def == NULL_TREE)
2062 def = vect_recog_temp_ssa_var (type, NULL);
2063 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2064 if (ext_def)
2066 basic_block new_bb
2067 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2068 gcc_assert (!new_bb);
2070 else
2071 append_pattern_def_seq (stmt_vinfo, def_stmt);
2073 stype = TREE_TYPE (def);
2074 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2076 if (TREE_CODE (def) == INTEGER_CST)
2078 if (!tree_fits_uhwi_p (def)
2079 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2080 || integer_zerop (def))
2081 return NULL;
2082 def2 = build_int_cst (stype,
2083 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2085 else
2087 tree vecstype = get_vectype_for_scalar_type (stype);
2089 if (vecstype == NULL_TREE)
2090 return NULL;
2091 def2 = vect_recog_temp_ssa_var (stype, NULL);
2092 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2093 if (ext_def)
2095 basic_block new_bb
2096 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2097 gcc_assert (!new_bb);
2099 else
2100 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2102 def2 = vect_recog_temp_ssa_var (stype, NULL);
2103 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2104 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2105 gimple_assign_lhs (def_stmt), mask);
2106 if (ext_def)
2108 basic_block new_bb
2109 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2110 gcc_assert (!new_bb);
2112 else
2113 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2116 var1 = vect_recog_temp_ssa_var (type, NULL);
2117 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2118 ? LSHIFT_EXPR : RSHIFT_EXPR,
2119 oprnd0, def);
2120 append_pattern_def_seq (stmt_vinfo, def_stmt);
2122 var2 = vect_recog_temp_ssa_var (type, NULL);
2123 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2124 ? RSHIFT_EXPR : LSHIFT_EXPR,
2125 oprnd0, def2);
2126 append_pattern_def_seq (stmt_vinfo, def_stmt);
2128 /* Pattern detected. */
2129 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2131 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2132 var = vect_recog_temp_ssa_var (type, NULL);
2133 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2135 return pattern_stmt;
2138 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2139 vectorized:
2141 type a_t;
2142 TYPE b_T, res_T;
2144 S1 a_t = ;
2145 S2 b_T = ;
2146 S3 res_T = b_T op a_t;
2148 where type 'TYPE' is a type with different size than 'type',
2149 and op is <<, >> or rotate.
2151 Also detect cases:
2153 type a_t;
2154 TYPE b_T, c_T, res_T;
2156 S0 c_T = ;
2157 S1 a_t = (type) c_T;
2158 S2 b_T = ;
2159 S3 res_T = b_T op a_t;
2161 Input/Output:
2163 * STMT_VINFO: The stmt from which the pattern search begins,
2164 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2165 with a shift/rotate which has same type on both operands, in the
2166 second case just b_T op c_T, in the first case with added cast
2167 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2169 Output:
2171 * TYPE_OUT: The type of the output of this pattern.
2173 * Return value: A new stmt that will be used to replace the shift/rotate
2174 S3 stmt. */
2176 static gimple *
2177 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2178 tree *type_out)
2180 gimple *last_stmt = stmt_vinfo->stmt;
2181 tree oprnd0, oprnd1, lhs, var;
2182 gimple *pattern_stmt;
2183 enum tree_code rhs_code;
2184 vec_info *vinfo = stmt_vinfo->vinfo;
2186 if (!is_gimple_assign (last_stmt))
2187 return NULL;
2189 rhs_code = gimple_assign_rhs_code (last_stmt);
2190 switch (rhs_code)
2192 case LSHIFT_EXPR:
2193 case RSHIFT_EXPR:
2194 case LROTATE_EXPR:
2195 case RROTATE_EXPR:
2196 break;
2197 default:
2198 return NULL;
2201 lhs = gimple_assign_lhs (last_stmt);
2202 oprnd0 = gimple_assign_rhs1 (last_stmt);
2203 oprnd1 = gimple_assign_rhs2 (last_stmt);
2204 if (TREE_CODE (oprnd0) != SSA_NAME
2205 || TREE_CODE (oprnd1) != SSA_NAME
2206 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2207 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2208 || TYPE_PRECISION (TREE_TYPE (lhs))
2209 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2210 return NULL;
2212 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2213 if (!def_vinfo)
2214 return NULL;
2216 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2217 if (*type_out == NULL_TREE)
2218 return NULL;
2220 tree def = NULL_TREE;
2221 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2222 if (def_stmt && gimple_assign_cast_p (def_stmt))
2224 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2225 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2226 && TYPE_PRECISION (TREE_TYPE (rhs1))
2227 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2229 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2230 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2231 def = rhs1;
2232 else
2234 tree mask
2235 = build_low_bits_mask (TREE_TYPE (rhs1),
2236 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2237 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2238 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2239 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2240 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2245 if (def == NULL_TREE)
2247 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2248 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2249 append_pattern_def_seq (stmt_vinfo, def_stmt);
2252 /* Pattern detected. */
2253 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2255 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2256 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2257 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2259 return pattern_stmt;
2262 /* Return true iff the target has a vector optab implementing the operation
2263 CODE on type VECTYPE. */
2265 static bool
2266 target_has_vecop_for_code (tree_code code, tree vectype)
2268 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2269 return voptab
2270 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2273 /* Verify that the target has optabs of VECTYPE to perform all the steps
2274 needed by the multiplication-by-immediate synthesis algorithm described by
2275 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2276 present. Return true iff the target supports all the steps. */
2278 static bool
2279 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2280 tree vectype, bool synth_shift_p)
2282 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2283 return false;
2285 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2286 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2288 if (var == negate_variant
2289 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2290 return false;
2292 /* If we must synthesize shifts with additions make sure that vector
2293 addition is available. */
2294 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2295 return false;
2297 for (int i = 1; i < alg->ops; i++)
2299 switch (alg->op[i])
2301 case alg_shift:
2302 break;
2303 case alg_add_t_m2:
2304 case alg_add_t2_m:
2305 case alg_add_factor:
2306 if (!supports_vplus)
2307 return false;
2308 break;
2309 case alg_sub_t_m2:
2310 case alg_sub_t2_m:
2311 case alg_sub_factor:
2312 if (!supports_vminus)
2313 return false;
2314 break;
2315 case alg_unknown:
2316 case alg_m:
2317 case alg_zero:
2318 case alg_impossible:
2319 return false;
2320 default:
2321 gcc_unreachable ();
2325 return true;
2328 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2329 putting the final result in DEST. Append all statements but the last into
2330 VINFO. Return the last statement. */
2332 static gimple *
2333 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2334 stmt_vec_info vinfo)
2336 HOST_WIDE_INT i;
2337 tree itype = TREE_TYPE (op);
2338 tree prev_res = op;
2339 gcc_assert (amnt >= 0);
2340 for (i = 0; i < amnt; i++)
2342 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2343 : dest;
2344 gimple *stmt
2345 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2346 prev_res = tmp_var;
2347 if (i < amnt - 1)
2348 append_pattern_def_seq (vinfo, stmt);
2349 else
2350 return stmt;
2352 gcc_unreachable ();
2353 return NULL;
2356 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2357 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2358 the process if necessary. Append the resulting assignment statements
2359 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2360 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2361 left shifts using additions. */
2363 static tree
2364 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2365 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2367 if (integer_zerop (op2)
2368 && (code == LSHIFT_EXPR
2369 || code == PLUS_EXPR))
2371 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2372 return op1;
2375 gimple *stmt;
2376 tree itype = TREE_TYPE (op1);
2377 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2379 if (code == LSHIFT_EXPR
2380 && synth_shift_p)
2382 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2383 stmt_vinfo);
2384 append_pattern_def_seq (stmt_vinfo, stmt);
2385 return tmp_var;
2388 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2389 append_pattern_def_seq (stmt_vinfo, stmt);
2390 return tmp_var;
2393 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2394 and simple arithmetic operations to be vectorized. Record the statements
2395 produced in STMT_VINFO and return the last statement in the sequence or
2396 NULL if it's not possible to synthesize such a multiplication.
2397 This function mirrors the behavior of expand_mult_const in expmed.c but
2398 works on tree-ssa form. */
2400 static gimple *
2401 vect_synth_mult_by_constant (tree op, tree val,
2402 stmt_vec_info stmt_vinfo)
2404 tree itype = TREE_TYPE (op);
2405 machine_mode mode = TYPE_MODE (itype);
2406 struct algorithm alg;
2407 mult_variant variant;
2408 if (!tree_fits_shwi_p (val))
2409 return NULL;
2411 /* Multiplication synthesis by shifts, adds and subs can introduce
2412 signed overflow where the original operation didn't. Perform the
2413 operations on an unsigned type and cast back to avoid this.
2414 In the future we may want to relax this for synthesis algorithms
2415 that we can prove do not cause unexpected overflow. */
2416 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2418 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2420 /* Targets that don't support vector shifts but support vector additions
2421 can synthesize shifts that way. */
2422 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2424 HOST_WIDE_INT hwval = tree_to_shwi (val);
2425 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2426 The vectorizer's benefit analysis will decide whether it's beneficial
2427 to do this. */
2428 bool possible = choose_mult_variant (mode, hwval, &alg,
2429 &variant, MAX_COST);
2430 if (!possible)
2431 return NULL;
2433 tree vectype = get_vectype_for_scalar_type (multtype);
2435 if (!vectype
2436 || !target_supports_mult_synth_alg (&alg, variant,
2437 vectype, synth_shift_p))
2438 return NULL;
2440 tree accumulator;
2442 /* Clear out the sequence of statements so we can populate it below. */
2443 gimple *stmt = NULL;
2445 if (cast_to_unsigned_p)
2447 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2448 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2449 append_pattern_def_seq (stmt_vinfo, stmt);
2450 op = tmp_op;
2453 if (alg.op[0] == alg_zero)
2454 accumulator = build_int_cst (multtype, 0);
2455 else
2456 accumulator = op;
2458 bool needs_fixup = (variant == negate_variant)
2459 || (variant == add_variant);
2461 for (int i = 1; i < alg.ops; i++)
2463 tree shft_log = build_int_cst (multtype, alg.log[i]);
2464 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2465 tree tmp_var = NULL_TREE;
2467 switch (alg.op[i])
2469 case alg_shift:
2470 if (synth_shift_p)
2471 stmt
2472 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2473 stmt_vinfo);
2474 else
2475 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2476 shft_log);
2477 break;
2478 case alg_add_t_m2:
2479 tmp_var
2480 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2481 stmt_vinfo, synth_shift_p);
2482 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2483 tmp_var);
2484 break;
2485 case alg_sub_t_m2:
2486 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2487 shft_log, stmt_vinfo,
2488 synth_shift_p);
2489 /* In some algorithms the first step involves zeroing the
2490 accumulator. If subtracting from such an accumulator
2491 just emit the negation directly. */
2492 if (integer_zerop (accumulator))
2493 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2494 else
2495 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2496 tmp_var);
2497 break;
2498 case alg_add_t2_m:
2499 tmp_var
2500 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2501 stmt_vinfo, synth_shift_p);
2502 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2503 break;
2504 case alg_sub_t2_m:
2505 tmp_var
2506 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2507 stmt_vinfo, synth_shift_p);
2508 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2509 break;
2510 case alg_add_factor:
2511 tmp_var
2512 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2513 stmt_vinfo, synth_shift_p);
2514 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2515 tmp_var);
2516 break;
2517 case alg_sub_factor:
2518 tmp_var
2519 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2520 stmt_vinfo, synth_shift_p);
2521 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2522 accumulator);
2523 break;
2524 default:
2525 gcc_unreachable ();
2527 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2528 but rather return it directly. */
2530 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2531 append_pattern_def_seq (stmt_vinfo, stmt);
2532 accumulator = accum_tmp;
2534 if (variant == negate_variant)
2536 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2537 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2538 accumulator = accum_tmp;
2539 if (cast_to_unsigned_p)
2540 append_pattern_def_seq (stmt_vinfo, stmt);
2542 else if (variant == add_variant)
2544 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2545 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2546 accumulator = accum_tmp;
2547 if (cast_to_unsigned_p)
2548 append_pattern_def_seq (stmt_vinfo, stmt);
2550 /* Move back to a signed if needed. */
2551 if (cast_to_unsigned_p)
2553 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2554 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2557 return stmt;
2560 /* Detect multiplication by constant and convert it into a sequence of
2561 shifts and additions, subtractions, negations. We reuse the
2562 choose_mult_variant algorithms from expmed.c
2564 Input/Output:
2566 STMT_VINFO: The stmt from which the pattern search begins,
2567 i.e. the mult stmt.
2569 Output:
2571 * TYPE_OUT: The type of the output of this pattern.
2573 * Return value: A new stmt that will be used to replace
2574 the multiplication. */
2576 static gimple *
2577 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2579 gimple *last_stmt = stmt_vinfo->stmt;
2580 tree oprnd0, oprnd1, vectype, itype;
2581 gimple *pattern_stmt;
2583 if (!is_gimple_assign (last_stmt))
2584 return NULL;
2586 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2587 return NULL;
2589 oprnd0 = gimple_assign_rhs1 (last_stmt);
2590 oprnd1 = gimple_assign_rhs2 (last_stmt);
2591 itype = TREE_TYPE (oprnd0);
2593 if (TREE_CODE (oprnd0) != SSA_NAME
2594 || TREE_CODE (oprnd1) != INTEGER_CST
2595 || !INTEGRAL_TYPE_P (itype)
2596 || !type_has_mode_precision_p (itype))
2597 return NULL;
2599 vectype = get_vectype_for_scalar_type (itype);
2600 if (vectype == NULL_TREE)
2601 return NULL;
2603 /* If the target can handle vectorized multiplication natively,
2604 don't attempt to optimize this. */
2605 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2606 if (mul_optab != unknown_optab)
2608 machine_mode vec_mode = TYPE_MODE (vectype);
2609 int icode = (int) optab_handler (mul_optab, vec_mode);
2610 if (icode != CODE_FOR_nothing)
2611 return NULL;
2614 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2615 if (!pattern_stmt)
2616 return NULL;
2618 /* Pattern detected. */
2619 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2621 *type_out = vectype;
2623 return pattern_stmt;
2626 /* Detect a signed division by a constant that wouldn't be
2627 otherwise vectorized:
2629 type a_t, b_t;
2631 S1 a_t = b_t / N;
2633 where type 'type' is an integral type and N is a constant.
2635 Similarly handle modulo by a constant:
2637 S4 a_t = b_t % N;
2639 Input/Output:
2641 * STMT_VINFO: The stmt from which the pattern search begins,
2642 i.e. the division stmt. S1 is replaced by if N is a power
2643 of two constant and type is signed:
2644 S3 y_t = b_t < 0 ? N - 1 : 0;
2645 S2 x_t = b_t + y_t;
2646 S1' a_t = x_t >> log2 (N);
2648 S4 is replaced if N is a power of two constant and
2649 type is signed by (where *_T temporaries have unsigned type):
2650 S9 y_T = b_t < 0 ? -1U : 0U;
2651 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2652 S7 z_t = (type) z_T;
2653 S6 w_t = b_t + z_t;
2654 S5 x_t = w_t & (N - 1);
2655 S4' a_t = x_t - z_t;
2657 Output:
2659 * TYPE_OUT: The type of the output of this pattern.
2661 * Return value: A new stmt that will be used to replace the division
2662 S1 or modulo S4 stmt. */
2664 static gimple *
2665 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2667 gimple *last_stmt = stmt_vinfo->stmt;
2668 tree oprnd0, oprnd1, vectype, itype, cond;
2669 gimple *pattern_stmt, *def_stmt;
2670 enum tree_code rhs_code;
2671 optab optab;
2672 tree q;
2673 int dummy_int, prec;
2675 if (!is_gimple_assign (last_stmt))
2676 return NULL;
2678 rhs_code = gimple_assign_rhs_code (last_stmt);
2679 switch (rhs_code)
2681 case TRUNC_DIV_EXPR:
2682 case EXACT_DIV_EXPR:
2683 case TRUNC_MOD_EXPR:
2684 break;
2685 default:
2686 return NULL;
2689 oprnd0 = gimple_assign_rhs1 (last_stmt);
2690 oprnd1 = gimple_assign_rhs2 (last_stmt);
2691 itype = TREE_TYPE (oprnd0);
2692 if (TREE_CODE (oprnd0) != SSA_NAME
2693 || TREE_CODE (oprnd1) != INTEGER_CST
2694 || TREE_CODE (itype) != INTEGER_TYPE
2695 || !type_has_mode_precision_p (itype))
2696 return NULL;
2698 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2699 vectype = get_vectype_for_scalar_type (itype);
2700 if (vectype == NULL_TREE)
2701 return NULL;
2703 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2705 /* If the target can handle vectorized division or modulo natively,
2706 don't attempt to optimize this, since native division is likely
2707 to give smaller code. */
2708 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2709 if (optab != unknown_optab)
2711 machine_mode vec_mode = TYPE_MODE (vectype);
2712 int icode = (int) optab_handler (optab, vec_mode);
2713 if (icode != CODE_FOR_nothing)
2714 return NULL;
2718 prec = TYPE_PRECISION (itype);
2719 if (integer_pow2p (oprnd1))
2721 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2722 return NULL;
2724 /* Pattern detected. */
2725 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2727 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2728 build_int_cst (itype, 0));
2729 if (rhs_code == TRUNC_DIV_EXPR
2730 || rhs_code == EXACT_DIV_EXPR)
2732 tree var = vect_recog_temp_ssa_var (itype, NULL);
2733 tree shift;
2734 def_stmt
2735 = gimple_build_assign (var, COND_EXPR, cond,
2736 fold_build2 (MINUS_EXPR, itype, oprnd1,
2737 build_int_cst (itype, 1)),
2738 build_int_cst (itype, 0));
2739 append_pattern_def_seq (stmt_vinfo, def_stmt);
2740 var = vect_recog_temp_ssa_var (itype, NULL);
2741 def_stmt
2742 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2743 gimple_assign_lhs (def_stmt));
2744 append_pattern_def_seq (stmt_vinfo, def_stmt);
2746 shift = build_int_cst (itype, tree_log2 (oprnd1));
2747 pattern_stmt
2748 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2749 RSHIFT_EXPR, var, shift);
2751 else
2753 tree signmask;
2754 if (compare_tree_int (oprnd1, 2) == 0)
2756 signmask = vect_recog_temp_ssa_var (itype, NULL);
2757 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2758 build_int_cst (itype, 1),
2759 build_int_cst (itype, 0));
2760 append_pattern_def_seq (stmt_vinfo, def_stmt);
2762 else
2764 tree utype
2765 = build_nonstandard_integer_type (prec, 1);
2766 tree vecutype = get_vectype_for_scalar_type (utype);
2767 tree shift
2768 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2769 - tree_log2 (oprnd1));
2770 tree var = vect_recog_temp_ssa_var (utype, NULL);
2772 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2773 build_int_cst (utype, -1),
2774 build_int_cst (utype, 0));
2775 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2776 var = vect_recog_temp_ssa_var (utype, NULL);
2777 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2778 gimple_assign_lhs (def_stmt),
2779 shift);
2780 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2781 signmask = vect_recog_temp_ssa_var (itype, NULL);
2782 def_stmt
2783 = gimple_build_assign (signmask, NOP_EXPR, var);
2784 append_pattern_def_seq (stmt_vinfo, def_stmt);
2786 def_stmt
2787 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2788 PLUS_EXPR, oprnd0, signmask);
2789 append_pattern_def_seq (stmt_vinfo, def_stmt);
2790 def_stmt
2791 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2792 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2793 fold_build2 (MINUS_EXPR, itype, oprnd1,
2794 build_int_cst (itype, 1)));
2795 append_pattern_def_seq (stmt_vinfo, def_stmt);
2797 pattern_stmt
2798 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2799 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2800 signmask);
2803 *type_out = vectype;
2804 return pattern_stmt;
2807 if (prec > HOST_BITS_PER_WIDE_INT
2808 || integer_zerop (oprnd1))
2809 return NULL;
2811 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2812 return NULL;
2814 if (TYPE_UNSIGNED (itype))
2816 unsigned HOST_WIDE_INT mh, ml;
2817 int pre_shift, post_shift;
2818 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2819 & GET_MODE_MASK (itype_mode));
2820 tree t1, t2, t3, t4;
2822 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2823 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2824 return NULL;
2826 /* Find a suitable multiplier and right shift count
2827 instead of multiplying with D. */
2828 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2830 /* If the suggested multiplier is more than SIZE bits, we can do better
2831 for even divisors, using an initial right shift. */
2832 if (mh != 0 && (d & 1) == 0)
2834 pre_shift = ctz_or_zero (d);
2835 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2836 &ml, &post_shift, &dummy_int);
2837 gcc_assert (!mh);
2839 else
2840 pre_shift = 0;
2842 if (mh != 0)
2844 if (post_shift - 1 >= prec)
2845 return NULL;
2847 /* t1 = oprnd0 h* ml;
2848 t2 = oprnd0 - t1;
2849 t3 = t2 >> 1;
2850 t4 = t1 + t3;
2851 q = t4 >> (post_shift - 1); */
2852 t1 = vect_recog_temp_ssa_var (itype, NULL);
2853 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2854 build_int_cst (itype, ml));
2855 append_pattern_def_seq (stmt_vinfo, def_stmt);
2857 t2 = vect_recog_temp_ssa_var (itype, NULL);
2858 def_stmt
2859 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2860 append_pattern_def_seq (stmt_vinfo, def_stmt);
2862 t3 = vect_recog_temp_ssa_var (itype, NULL);
2863 def_stmt
2864 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2865 append_pattern_def_seq (stmt_vinfo, def_stmt);
2867 t4 = vect_recog_temp_ssa_var (itype, NULL);
2868 def_stmt
2869 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2871 if (post_shift != 1)
2873 append_pattern_def_seq (stmt_vinfo, def_stmt);
2875 q = vect_recog_temp_ssa_var (itype, NULL);
2876 pattern_stmt
2877 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2878 build_int_cst (itype, post_shift - 1));
2880 else
2882 q = t4;
2883 pattern_stmt = def_stmt;
2886 else
2888 if (pre_shift >= prec || post_shift >= prec)
2889 return NULL;
2891 /* t1 = oprnd0 >> pre_shift;
2892 t2 = t1 h* ml;
2893 q = t2 >> post_shift; */
2894 if (pre_shift)
2896 t1 = vect_recog_temp_ssa_var (itype, NULL);
2897 def_stmt
2898 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2899 build_int_cst (NULL, pre_shift));
2900 append_pattern_def_seq (stmt_vinfo, def_stmt);
2902 else
2903 t1 = oprnd0;
2905 t2 = vect_recog_temp_ssa_var (itype, NULL);
2906 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2907 build_int_cst (itype, ml));
2909 if (post_shift)
2911 append_pattern_def_seq (stmt_vinfo, def_stmt);
2913 q = vect_recog_temp_ssa_var (itype, NULL);
2914 def_stmt
2915 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2916 build_int_cst (itype, post_shift));
2918 else
2919 q = t2;
2921 pattern_stmt = def_stmt;
2924 else
2926 unsigned HOST_WIDE_INT ml;
2927 int post_shift;
2928 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2929 unsigned HOST_WIDE_INT abs_d;
2930 bool add = false;
2931 tree t1, t2, t3, t4;
2933 /* Give up for -1. */
2934 if (d == -1)
2935 return NULL;
2937 /* Since d might be INT_MIN, we have to cast to
2938 unsigned HOST_WIDE_INT before negating to avoid
2939 undefined signed overflow. */
2940 abs_d = (d >= 0
2941 ? (unsigned HOST_WIDE_INT) d
2942 : - (unsigned HOST_WIDE_INT) d);
2944 /* n rem d = n rem -d */
2945 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2947 d = abs_d;
2948 oprnd1 = build_int_cst (itype, abs_d);
2950 else if (HOST_BITS_PER_WIDE_INT >= prec
2951 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2952 /* This case is not handled correctly below. */
2953 return NULL;
2955 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2956 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2958 add = true;
2959 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2961 if (post_shift >= prec)
2962 return NULL;
2964 /* t1 = oprnd0 h* ml; */
2965 t1 = vect_recog_temp_ssa_var (itype, NULL);
2966 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2967 build_int_cst (itype, ml));
2969 if (add)
2971 /* t2 = t1 + oprnd0; */
2972 append_pattern_def_seq (stmt_vinfo, def_stmt);
2973 t2 = vect_recog_temp_ssa_var (itype, NULL);
2974 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2976 else
2977 t2 = t1;
2979 if (post_shift)
2981 /* t3 = t2 >> post_shift; */
2982 append_pattern_def_seq (stmt_vinfo, def_stmt);
2983 t3 = vect_recog_temp_ssa_var (itype, NULL);
2984 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2985 build_int_cst (itype, post_shift));
2987 else
2988 t3 = t2;
2990 wide_int oprnd0_min, oprnd0_max;
2991 int msb = 1;
2992 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2994 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2995 msb = 0;
2996 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2997 msb = -1;
3000 if (msb == 0 && d >= 0)
3002 /* q = t3; */
3003 q = t3;
3004 pattern_stmt = def_stmt;
3006 else
3008 /* t4 = oprnd0 >> (prec - 1);
3009 or if we know from VRP that oprnd0 >= 0
3010 t4 = 0;
3011 or if we know from VRP that oprnd0 < 0
3012 t4 = -1; */
3013 append_pattern_def_seq (stmt_vinfo, def_stmt);
3014 t4 = vect_recog_temp_ssa_var (itype, NULL);
3015 if (msb != 1)
3016 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3017 build_int_cst (itype, msb));
3018 else
3019 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3020 build_int_cst (itype, prec - 1));
3021 append_pattern_def_seq (stmt_vinfo, def_stmt);
3023 /* q = t3 - t4; or q = t4 - t3; */
3024 q = vect_recog_temp_ssa_var (itype, NULL);
3025 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3026 d < 0 ? t3 : t4);
3030 if (rhs_code == TRUNC_MOD_EXPR)
3032 tree r, t1;
3034 /* We divided. Now finish by:
3035 t1 = q * oprnd1;
3036 r = oprnd0 - t1; */
3037 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3039 t1 = vect_recog_temp_ssa_var (itype, NULL);
3040 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3041 append_pattern_def_seq (stmt_vinfo, def_stmt);
3043 r = vect_recog_temp_ssa_var (itype, NULL);
3044 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3047 /* Pattern detected. */
3048 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3050 *type_out = vectype;
3051 return pattern_stmt;
3054 /* Function vect_recog_mixed_size_cond_pattern
3056 Try to find the following pattern:
3058 type x_t, y_t;
3059 TYPE a_T, b_T, c_T;
3060 loop:
3061 S1 a_T = x_t CMP y_t ? b_T : c_T;
3063 where type 'TYPE' is an integral type which has different size
3064 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3065 than 'type', the constants need to fit into an integer type
3066 with the same width as 'type') or results of conversion from 'type'.
3068 Input:
3070 * STMT_VINFO: The stmt from which the pattern search begins.
3072 Output:
3074 * TYPE_OUT: The type of the output of this pattern.
3076 * Return value: A new stmt that will be used to replace the pattern.
3077 Additionally a def_stmt is added.
3079 a_it = x_t CMP y_t ? b_it : c_it;
3080 a_T = (TYPE) a_it; */
3082 static gimple *
3083 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3085 gimple *last_stmt = stmt_vinfo->stmt;
3086 tree cond_expr, then_clause, else_clause;
3087 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3088 gimple *pattern_stmt, *def_stmt;
3089 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3090 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3091 bool promotion;
3092 tree comp_scalar_type;
3094 if (!is_gimple_assign (last_stmt)
3095 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3096 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3097 return NULL;
3099 cond_expr = gimple_assign_rhs1 (last_stmt);
3100 then_clause = gimple_assign_rhs2 (last_stmt);
3101 else_clause = gimple_assign_rhs3 (last_stmt);
3103 if (!COMPARISON_CLASS_P (cond_expr))
3104 return NULL;
3106 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3107 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3108 if (comp_vectype == NULL_TREE)
3109 return NULL;
3111 type = gimple_expr_type (last_stmt);
3112 if (types_compatible_p (type, comp_scalar_type)
3113 || ((TREE_CODE (then_clause) != INTEGER_CST
3114 || TREE_CODE (else_clause) != INTEGER_CST)
3115 && !INTEGRAL_TYPE_P (comp_scalar_type))
3116 || !INTEGRAL_TYPE_P (type))
3117 return NULL;
3119 if ((TREE_CODE (then_clause) != INTEGER_CST
3120 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3121 &def_stmt0, &promotion))
3122 || (TREE_CODE (else_clause) != INTEGER_CST
3123 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3124 &def_stmt1, &promotion)))
3125 return NULL;
3127 if (orig_type0 && orig_type1
3128 && !types_compatible_p (orig_type0, orig_type1))
3129 return NULL;
3131 if (orig_type0)
3133 if (!types_compatible_p (orig_type0, comp_scalar_type))
3134 return NULL;
3135 then_clause = gimple_assign_rhs1 (def_stmt0);
3136 itype = orig_type0;
3139 if (orig_type1)
3141 if (!types_compatible_p (orig_type1, comp_scalar_type))
3142 return NULL;
3143 else_clause = gimple_assign_rhs1 (def_stmt1);
3144 itype = orig_type1;
3148 HOST_WIDE_INT cmp_mode_size
3149 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3151 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3152 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3153 return NULL;
3155 vectype = get_vectype_for_scalar_type (type);
3156 if (vectype == NULL_TREE)
3157 return NULL;
3159 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3160 return NULL;
3162 if (itype == NULL_TREE)
3163 itype = build_nonstandard_integer_type (cmp_mode_size,
3164 TYPE_UNSIGNED (type));
3166 if (itype == NULL_TREE
3167 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3168 return NULL;
3170 vecitype = get_vectype_for_scalar_type (itype);
3171 if (vecitype == NULL_TREE)
3172 return NULL;
3174 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3175 return NULL;
3177 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3179 if ((TREE_CODE (then_clause) == INTEGER_CST
3180 && !int_fits_type_p (then_clause, itype))
3181 || (TREE_CODE (else_clause) == INTEGER_CST
3182 && !int_fits_type_p (else_clause, itype)))
3183 return NULL;
3186 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3187 COND_EXPR, unshare_expr (cond_expr),
3188 fold_convert (itype, then_clause),
3189 fold_convert (itype, else_clause));
3190 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3191 NOP_EXPR, gimple_assign_lhs (def_stmt));
3193 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3194 *type_out = vectype;
3196 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3198 return pattern_stmt;
3202 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3203 true if bool VAR can and should be optimized that way. Assume it shouldn't
3204 in case it's a result of a comparison which can be directly vectorized into
3205 a vector comparison. Fills in STMTS with all stmts visited during the
3206 walk. */
3208 static bool
3209 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3211 tree rhs1;
3212 enum tree_code rhs_code;
3214 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3215 if (!def_stmt_info)
3216 return false;
3218 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3219 if (!def_stmt)
3220 return false;
3222 if (stmts.contains (def_stmt))
3223 return true;
3225 rhs1 = gimple_assign_rhs1 (def_stmt);
3226 rhs_code = gimple_assign_rhs_code (def_stmt);
3227 switch (rhs_code)
3229 case SSA_NAME:
3230 if (! check_bool_pattern (rhs1, vinfo, stmts))
3231 return false;
3232 break;
3234 CASE_CONVERT:
3235 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3236 return false;
3237 if (! check_bool_pattern (rhs1, vinfo, stmts))
3238 return false;
3239 break;
3241 case BIT_NOT_EXPR:
3242 if (! check_bool_pattern (rhs1, vinfo, stmts))
3243 return false;
3244 break;
3246 case BIT_AND_EXPR:
3247 case BIT_IOR_EXPR:
3248 case BIT_XOR_EXPR:
3249 if (! check_bool_pattern (rhs1, vinfo, stmts)
3250 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3251 return false;
3252 break;
3254 default:
3255 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3257 tree vecitype, comp_vectype;
3259 /* If the comparison can throw, then is_gimple_condexpr will be
3260 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3261 if (stmt_could_throw_p (def_stmt))
3262 return false;
3264 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3265 if (comp_vectype == NULL_TREE)
3266 return false;
3268 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3269 if (mask_type
3270 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3271 return false;
3273 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3275 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3276 tree itype
3277 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3278 vecitype = get_vectype_for_scalar_type (itype);
3279 if (vecitype == NULL_TREE)
3280 return false;
3282 else
3283 vecitype = comp_vectype;
3284 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3285 return false;
3287 else
3288 return false;
3289 break;
3292 bool res = stmts.add (def_stmt);
3293 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3294 gcc_assert (!res);
3296 return true;
3300 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3301 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3302 pattern sequence. */
3304 static tree
3305 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3307 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3308 NOP_EXPR, var);
3309 append_pattern_def_seq (stmt_info, cast_stmt,
3310 get_vectype_for_scalar_type (type));
3311 return gimple_assign_lhs (cast_stmt);
3314 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3315 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3316 type, OUT_TYPE is the desired final integer type of the whole pattern.
3317 STMT_INFO is the info of the pattern root and is where pattern stmts should
3318 be associated with. DEFS is a map of pattern defs. */
3320 static void
3321 adjust_bool_pattern (tree var, tree out_type,
3322 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3324 gimple *stmt = SSA_NAME_DEF_STMT (var);
3325 enum tree_code rhs_code, def_rhs_code;
3326 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3327 location_t loc;
3328 gimple *pattern_stmt, *def_stmt;
3329 tree trueval = NULL_TREE;
3331 rhs1 = gimple_assign_rhs1 (stmt);
3332 rhs2 = gimple_assign_rhs2 (stmt);
3333 rhs_code = gimple_assign_rhs_code (stmt);
3334 loc = gimple_location (stmt);
3335 switch (rhs_code)
3337 case SSA_NAME:
3338 CASE_CONVERT:
3339 irhs1 = *defs.get (rhs1);
3340 itype = TREE_TYPE (irhs1);
3341 pattern_stmt
3342 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3343 SSA_NAME, irhs1);
3344 break;
3346 case BIT_NOT_EXPR:
3347 irhs1 = *defs.get (rhs1);
3348 itype = TREE_TYPE (irhs1);
3349 pattern_stmt
3350 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3351 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3352 break;
3354 case BIT_AND_EXPR:
3355 /* Try to optimize x = y & (a < b ? 1 : 0); into
3356 x = (a < b ? y : 0);
3358 E.g. for:
3359 bool a_b, b_b, c_b;
3360 TYPE d_T;
3362 S1 a_b = x1 CMP1 y1;
3363 S2 b_b = x2 CMP2 y2;
3364 S3 c_b = a_b & b_b;
3365 S4 d_T = (TYPE) c_b;
3367 we would normally emit:
3369 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3370 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3371 S3' c_T = a_T & b_T;
3372 S4' d_T = c_T;
3374 but we can save one stmt by using the
3375 result of one of the COND_EXPRs in the other COND_EXPR and leave
3376 BIT_AND_EXPR stmt out:
3378 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3379 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3380 S4' f_T = c_T;
3382 At least when VEC_COND_EXPR is implemented using masks
3383 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3384 computes the comparison masks and ands it, in one case with
3385 all ones vector, in the other case with a vector register.
3386 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3387 often more expensive. */
3388 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3389 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3390 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3392 irhs1 = *defs.get (rhs1);
3393 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3394 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3395 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3397 rhs_code = def_rhs_code;
3398 rhs1 = def_rhs1;
3399 rhs2 = gimple_assign_rhs2 (def_stmt);
3400 trueval = irhs1;
3401 goto do_compare;
3403 else
3404 irhs2 = *defs.get (rhs2);
3405 goto and_ior_xor;
3407 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3408 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3409 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3411 irhs2 = *defs.get (rhs2);
3412 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3413 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3414 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3416 rhs_code = def_rhs_code;
3417 rhs1 = def_rhs1;
3418 rhs2 = gimple_assign_rhs2 (def_stmt);
3419 trueval = irhs2;
3420 goto do_compare;
3422 else
3423 irhs1 = *defs.get (rhs1);
3424 goto and_ior_xor;
3426 /* FALLTHRU */
3427 case BIT_IOR_EXPR:
3428 case BIT_XOR_EXPR:
3429 irhs1 = *defs.get (rhs1);
3430 irhs2 = *defs.get (rhs2);
3431 and_ior_xor:
3432 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3433 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3435 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3436 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3437 int out_prec = TYPE_PRECISION (out_type);
3438 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3439 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3440 stmt_info);
3441 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3442 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3443 stmt_info);
3444 else
3446 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3447 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3450 itype = TREE_TYPE (irhs1);
3451 pattern_stmt
3452 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3453 rhs_code, irhs1, irhs2);
3454 break;
3456 default:
3457 do_compare:
3458 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3459 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3460 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3461 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3462 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3464 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3465 itype
3466 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3468 else
3469 itype = TREE_TYPE (rhs1);
3470 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3471 if (trueval == NULL_TREE)
3472 trueval = build_int_cst (itype, 1);
3473 else
3474 gcc_checking_assert (useless_type_conversion_p (itype,
3475 TREE_TYPE (trueval)));
3476 pattern_stmt
3477 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3478 COND_EXPR, cond_expr, trueval,
3479 build_int_cst (itype, 0));
3480 break;
3483 gimple_set_location (pattern_stmt, loc);
3484 append_pattern_def_seq (stmt_info, pattern_stmt,
3485 get_vectype_for_scalar_type (itype));
3486 defs.put (var, gimple_assign_lhs (pattern_stmt));
3489 /* Comparison function to qsort a vector of gimple stmts after UID. */
3491 static int
3492 sort_after_uid (const void *p1, const void *p2)
3494 const gimple *stmt1 = *(const gimple * const *)p1;
3495 const gimple *stmt2 = *(const gimple * const *)p2;
3496 return gimple_uid (stmt1) - gimple_uid (stmt2);
3499 /* Create pattern stmts for all stmts participating in the bool pattern
3500 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3501 OUT_TYPE. Return the def of the pattern root. */
3503 static tree
3504 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3505 tree out_type, stmt_vec_info stmt_info)
3507 /* Gather original stmts in the bool pattern in their order of appearance
3508 in the IL. */
3509 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3510 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3511 i != bool_stmt_set.end (); ++i)
3512 bool_stmts.quick_push (*i);
3513 bool_stmts.qsort (sort_after_uid);
3515 /* Now process them in that order, producing pattern stmts. */
3516 hash_map <tree, tree> defs;
3517 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3518 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3519 out_type, stmt_info, defs);
3521 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3522 gimple *pattern_stmt
3523 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3524 return gimple_assign_lhs (pattern_stmt);
3527 /* Helper for search_type_for_mask. */
3529 static tree
3530 search_type_for_mask_1 (tree var, vec_info *vinfo,
3531 hash_map<gimple *, tree> &cache)
3533 tree rhs1;
3534 enum tree_code rhs_code;
3535 tree res = NULL_TREE, res2;
3537 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3538 return NULL_TREE;
3540 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3541 if (!def_stmt_info)
3542 return NULL_TREE;
3544 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3545 if (!def_stmt)
3546 return NULL_TREE;
3548 tree *c = cache.get (def_stmt);
3549 if (c)
3550 return *c;
3552 rhs_code = gimple_assign_rhs_code (def_stmt);
3553 rhs1 = gimple_assign_rhs1 (def_stmt);
3555 switch (rhs_code)
3557 case SSA_NAME:
3558 case BIT_NOT_EXPR:
3559 CASE_CONVERT:
3560 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3561 break;
3563 case BIT_AND_EXPR:
3564 case BIT_IOR_EXPR:
3565 case BIT_XOR_EXPR:
3566 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3567 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3568 cache);
3569 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3570 res = res2;
3571 break;
3573 default:
3574 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3576 tree comp_vectype, mask_type;
3578 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3580 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3581 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3582 vinfo, cache);
3583 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3584 res = res2;
3585 break;
3588 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3589 if (comp_vectype == NULL_TREE)
3591 res = NULL_TREE;
3592 break;
3595 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3596 if (!mask_type
3597 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3599 res = NULL_TREE;
3600 break;
3603 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3604 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3606 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3607 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3609 else
3610 res = TREE_TYPE (rhs1);
3614 cache.put (def_stmt, res);
3615 return res;
3618 /* Return the proper type for converting bool VAR into
3619 an integer value or NULL_TREE if no such type exists.
3620 The type is chosen so that converted value has the
3621 same number of elements as VAR's vector type. */
3623 static tree
3624 search_type_for_mask (tree var, vec_info *vinfo)
3626 hash_map<gimple *, tree> cache;
3627 return search_type_for_mask_1 (var, vinfo, cache);
3630 /* Function vect_recog_bool_pattern
3632 Try to find pattern like following:
3634 bool a_b, b_b, c_b, d_b, e_b;
3635 TYPE f_T;
3636 loop:
3637 S1 a_b = x1 CMP1 y1;
3638 S2 b_b = x2 CMP2 y2;
3639 S3 c_b = a_b & b_b;
3640 S4 d_b = x3 CMP3 y3;
3641 S5 e_b = c_b | d_b;
3642 S6 f_T = (TYPE) e_b;
3644 where type 'TYPE' is an integral type. Or a similar pattern
3645 ending in
3647 S6 f_Y = e_b ? r_Y : s_Y;
3649 as results from if-conversion of a complex condition.
3651 Input:
3653 * STMT_VINFO: The stmt at the end from which the pattern
3654 search begins, i.e. cast of a bool to
3655 an integer type.
3657 Output:
3659 * TYPE_OUT: The type of the output of this pattern.
3661 * Return value: A new stmt that will be used to replace the pattern.
3663 Assuming size of TYPE is the same as size of all comparisons
3664 (otherwise some casts would be added where needed), the above
3665 sequence we create related pattern stmts:
3666 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3667 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3668 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3669 S5' e_T = c_T | d_T;
3670 S6' f_T = e_T;
3672 Instead of the above S3' we could emit:
3673 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3674 S3' c_T = a_T | b_T;
3675 but the above is more efficient. */
3677 static gimple *
3678 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3680 gimple *last_stmt = stmt_vinfo->stmt;
3681 enum tree_code rhs_code;
3682 tree var, lhs, rhs, vectype;
3683 vec_info *vinfo = stmt_vinfo->vinfo;
3684 gimple *pattern_stmt;
3686 if (!is_gimple_assign (last_stmt))
3687 return NULL;
3689 var = gimple_assign_rhs1 (last_stmt);
3690 lhs = gimple_assign_lhs (last_stmt);
3692 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3693 return NULL;
3695 hash_set<gimple *> bool_stmts;
3697 rhs_code = gimple_assign_rhs_code (last_stmt);
3698 if (CONVERT_EXPR_CODE_P (rhs_code))
3700 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3701 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3702 return NULL;
3703 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3704 if (vectype == NULL_TREE)
3705 return NULL;
3707 if (check_bool_pattern (var, vinfo, bool_stmts))
3709 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
3710 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3711 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3712 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3713 else
3714 pattern_stmt
3715 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3717 else
3719 tree type = search_type_for_mask (var, vinfo);
3720 tree cst0, cst1, tmp;
3722 if (!type)
3723 return NULL;
3725 /* We may directly use cond with narrowed type to avoid
3726 multiple cond exprs with following result packing and
3727 perform single cond with packed mask instead. In case
3728 of widening we better make cond first and then extract
3729 results. */
3730 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3731 type = TREE_TYPE (lhs);
3733 cst0 = build_int_cst (type, 0);
3734 cst1 = build_int_cst (type, 1);
3735 tmp = vect_recog_temp_ssa_var (type, NULL);
3736 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3738 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3740 tree new_vectype = get_vectype_for_scalar_type (type);
3741 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3743 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3744 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3748 *type_out = vectype;
3749 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3751 return pattern_stmt;
3753 else if (rhs_code == COND_EXPR
3754 && TREE_CODE (var) == SSA_NAME)
3756 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3757 if (vectype == NULL_TREE)
3758 return NULL;
3760 /* Build a scalar type for the boolean result that when
3761 vectorized matches the vector type of the result in
3762 size and number of elements. */
3763 unsigned prec
3764 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3765 TYPE_VECTOR_SUBPARTS (vectype));
3767 tree type
3768 = build_nonstandard_integer_type (prec,
3769 TYPE_UNSIGNED (TREE_TYPE (var)));
3770 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3771 return NULL;
3773 if (!check_bool_pattern (var, vinfo, bool_stmts))
3774 return NULL;
3776 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
3778 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3779 pattern_stmt
3780 = gimple_build_assign (lhs, COND_EXPR,
3781 build2 (NE_EXPR, boolean_type_node,
3782 rhs, build_int_cst (type, 0)),
3783 gimple_assign_rhs2 (last_stmt),
3784 gimple_assign_rhs3 (last_stmt));
3785 *type_out = vectype;
3786 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3788 return pattern_stmt;
3790 else if (rhs_code == SSA_NAME
3791 && STMT_VINFO_DATA_REF (stmt_vinfo))
3793 stmt_vec_info pattern_stmt_info;
3794 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3795 gcc_assert (vectype != NULL_TREE);
3796 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3797 return NULL;
3799 if (check_bool_pattern (var, vinfo, bool_stmts))
3800 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
3801 else
3803 tree type = search_type_for_mask (var, vinfo);
3804 tree cst0, cst1, new_vectype;
3806 if (!type)
3807 return NULL;
3809 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3810 type = TREE_TYPE (vectype);
3812 cst0 = build_int_cst (type, 0);
3813 cst1 = build_int_cst (type, 1);
3814 new_vectype = get_vectype_for_scalar_type (type);
3816 rhs = vect_recog_temp_ssa_var (type, NULL);
3817 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3818 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3821 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3822 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3824 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3825 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3826 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3827 rhs = rhs2;
3829 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3830 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3831 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3832 *type_out = vectype;
3833 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3835 return pattern_stmt;
3837 else
3838 return NULL;
3842 /* A helper for vect_recog_mask_conversion_pattern. Build
3843 conversion of MASK to a type suitable for masking VECTYPE.
3844 Built statement gets required vectype and is appended to
3845 a pattern sequence of STMT_VINFO.
3847 Return converted mask. */
3849 static tree
3850 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
3852 gimple *stmt;
3853 tree masktype, tmp;
3855 masktype = build_same_sized_truth_vector_type (vectype);
3856 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3857 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3858 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
3860 return tmp;
3864 /* Function vect_recog_mask_conversion_pattern
3866 Try to find statements which require boolean type
3867 converison. Additional conversion statements are
3868 added to handle such cases. For example:
3870 bool m_1, m_2, m_3;
3871 int i_4, i_5;
3872 double d_6, d_7;
3873 char c_1, c_2, c_3;
3875 S1 m_1 = i_4 > i_5;
3876 S2 m_2 = d_6 < d_7;
3877 S3 m_3 = m_1 & m_2;
3878 S4 c_1 = m_3 ? c_2 : c_3;
3880 Will be transformed into:
3882 S1 m_1 = i_4 > i_5;
3883 S2 m_2 = d_6 < d_7;
3884 S3'' m_2' = (_Bool[bitsize=32])m_2
3885 S3' m_3' = m_1 & m_2';
3886 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3887 S4' c_1' = m_3'' ? c_2 : c_3; */
3889 static gimple *
3890 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3892 gimple *last_stmt = stmt_vinfo->stmt;
3893 enum tree_code rhs_code;
3894 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3895 tree vectype1, vectype2;
3896 stmt_vec_info pattern_stmt_info;
3897 vec_info *vinfo = stmt_vinfo->vinfo;
3899 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3900 if (is_gimple_call (last_stmt)
3901 && gimple_call_internal_p (last_stmt))
3903 gcall *pattern_stmt;
3905 internal_fn ifn = gimple_call_internal_fn (last_stmt);
3906 int mask_argno = internal_fn_mask_index (ifn);
3907 if (mask_argno < 0)
3908 return NULL;
3910 bool store_p = internal_store_fn_p (ifn);
3911 if (store_p)
3913 int rhs_index = internal_fn_stored_value_index (ifn);
3914 tree rhs = gimple_call_arg (last_stmt, rhs_index);
3915 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
3917 else
3919 lhs = gimple_call_lhs (last_stmt);
3920 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3923 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3924 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3925 if (!mask_arg_type)
3926 return NULL;
3927 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
3929 if (!vectype1 || !vectype2
3930 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3931 TYPE_VECTOR_SUBPARTS (vectype2)))
3932 return NULL;
3934 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3936 auto_vec<tree, 8> args;
3937 unsigned int nargs = gimple_call_num_args (last_stmt);
3938 args.safe_grow (nargs);
3939 for (unsigned int i = 0; i < nargs; ++i)
3940 args[i] = ((int) i == mask_argno
3941 ? tmp
3942 : gimple_call_arg (last_stmt, i));
3943 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
3945 if (!store_p)
3947 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3948 gimple_call_set_lhs (pattern_stmt, lhs);
3950 gimple_call_set_nothrow (pattern_stmt, true);
3952 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3953 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3954 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3956 *type_out = vectype1;
3957 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3959 return pattern_stmt;
3962 if (!is_gimple_assign (last_stmt))
3963 return NULL;
3965 gimple *pattern_stmt;
3966 lhs = gimple_assign_lhs (last_stmt);
3967 rhs1 = gimple_assign_rhs1 (last_stmt);
3968 rhs_code = gimple_assign_rhs_code (last_stmt);
3970 /* Check for cond expression requiring mask conversion. */
3971 if (rhs_code == COND_EXPR)
3973 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3975 if (TREE_CODE (rhs1) == SSA_NAME)
3977 rhs1_type = search_type_for_mask (rhs1, vinfo);
3978 if (!rhs1_type)
3979 return NULL;
3981 else if (COMPARISON_CLASS_P (rhs1))
3983 /* Check whether we're comparing scalar booleans and (if so)
3984 whether a better mask type exists than the mask associated
3985 with boolean-sized elements. This avoids unnecessary packs
3986 and unpacks if the booleans are set from comparisons of
3987 wider types. E.g. in:
3989 int x1, x2, x3, x4, y1, y1;
3991 bool b1 = (x1 == x2);
3992 bool b2 = (x3 == x4);
3993 ... = b1 == b2 ? y1 : y2;
3995 it is better for b1 and b2 to use the mask type associated
3996 with int elements rather bool (byte) elements. */
3997 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
3998 if (!rhs1_type)
3999 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4001 else
4002 return NULL;
4004 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4006 if (!vectype1 || !vectype2)
4007 return NULL;
4009 /* Continue if a conversion is needed. Also continue if we have
4010 a comparison whose vector type would normally be different from
4011 VECTYPE2 when considered in isolation. In that case we'll
4012 replace the comparison with an SSA name (so that we can record
4013 its vector type) and behave as though the comparison was an SSA
4014 name from the outset. */
4015 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4016 TYPE_VECTOR_SUBPARTS (vectype2))
4017 && (TREE_CODE (rhs1) == SSA_NAME
4018 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4019 return NULL;
4021 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4022 in place, we can handle it in vectorizable_condition. This avoids
4023 unnecessary promotion stmts and increased vectorization factor. */
4024 if (COMPARISON_CLASS_P (rhs1)
4025 && INTEGRAL_TYPE_P (rhs1_type)
4026 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4027 TYPE_VECTOR_SUBPARTS (vectype2)))
4029 enum vect_def_type dt;
4030 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4031 && dt == vect_external_def
4032 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4033 && (dt == vect_external_def
4034 || dt == vect_constant_def))
4036 tree wide_scalar_type = build_nonstandard_integer_type
4037 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4038 TYPE_UNSIGNED (rhs1_type));
4039 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4040 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4041 return NULL;
4045 /* If rhs1 is a comparison we need to move it into a
4046 separate statement. */
4047 if (TREE_CODE (rhs1) != SSA_NAME)
4049 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4050 pattern_stmt = gimple_build_assign (tmp, rhs1);
4051 rhs1 = tmp;
4052 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
4055 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4056 TYPE_VECTOR_SUBPARTS (vectype2)))
4057 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4058 else
4059 tmp = rhs1;
4061 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4062 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4063 gimple_assign_rhs2 (last_stmt),
4064 gimple_assign_rhs3 (last_stmt));
4066 *type_out = vectype1;
4067 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4069 return pattern_stmt;
4072 /* Now check for binary boolean operations requiring conversion for
4073 one of operands. */
4074 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4075 return NULL;
4077 if (rhs_code != BIT_IOR_EXPR
4078 && rhs_code != BIT_XOR_EXPR
4079 && rhs_code != BIT_AND_EXPR
4080 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4081 return NULL;
4083 rhs2 = gimple_assign_rhs2 (last_stmt);
4085 rhs1_type = search_type_for_mask (rhs1, vinfo);
4086 rhs2_type = search_type_for_mask (rhs2, vinfo);
4088 if (!rhs1_type || !rhs2_type
4089 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4090 return NULL;
4092 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4094 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4095 if (!vectype1)
4096 return NULL;
4097 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4099 else
4101 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4102 if (!vectype1)
4103 return NULL;
4104 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4107 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4108 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4110 *type_out = vectype1;
4111 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4113 return pattern_stmt;
4116 /* STMT_INFO is a load or store. If the load or store is conditional, return
4117 the boolean condition under which it occurs, otherwise return null. */
4119 static tree
4120 vect_get_load_store_mask (stmt_vec_info stmt_info)
4122 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4124 gcc_assert (gimple_assign_single_p (def_assign));
4125 return NULL_TREE;
4128 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4130 internal_fn ifn = gimple_call_internal_fn (def_call);
4131 int mask_index = internal_fn_mask_index (ifn);
4132 return gimple_call_arg (def_call, mask_index);
4135 gcc_unreachable ();
4138 /* Return the scalar offset type that an internal gather/scatter function
4139 should use. GS_INFO describes the gather/scatter operation. */
4141 static tree
4142 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4144 tree offset_type = TREE_TYPE (gs_info->offset);
4145 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4147 /* Enforced by vect_check_gather_scatter. */
4148 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4149 gcc_assert (element_bits >= offset_bits);
4151 /* If the offset is narrower than the elements, extend it according
4152 to its sign. */
4153 if (element_bits > offset_bits)
4154 return build_nonstandard_integer_type (element_bits,
4155 TYPE_UNSIGNED (offset_type));
4157 return offset_type;
4160 /* Return MASK if MASK is suitable for masking an operation on vectors
4161 of type VECTYPE, otherwise convert it into such a form and return
4162 the result. Associate any conversion statements with STMT_INFO's
4163 pattern. */
4165 static tree
4166 vect_convert_mask_for_vectype (tree mask, tree vectype,
4167 stmt_vec_info stmt_info, vec_info *vinfo)
4169 tree mask_type = search_type_for_mask (mask, vinfo);
4170 if (mask_type)
4172 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4173 if (mask_vectype
4174 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4175 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4176 mask = build_mask_conversion (mask, vectype, stmt_info);
4178 return mask;
4181 /* Return the equivalent of:
4183 fold_convert (TYPE, VALUE)
4185 with the expectation that the operation will be vectorized.
4186 If new statements are needed, add them as pattern statements
4187 to STMT_INFO. */
4189 static tree
4190 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4192 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4193 return value;
4195 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4196 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4197 append_pattern_def_seq (stmt_info, conversion,
4198 get_vectype_for_scalar_type (type));
4199 return new_value;
4202 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4203 internal function. Return the final statement on success and set
4204 *TYPE_OUT to the vector type being loaded or stored.
4206 This function only handles gathers and scatters that were recognized
4207 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4209 static gimple *
4210 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4212 /* Currently we only support this for loop vectorization. */
4213 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4214 if (!loop_vinfo)
4215 return NULL;
4217 /* Make sure that we're looking at a gather load or scatter store. */
4218 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4219 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4220 return NULL;
4222 /* Get the boolean that controls whether the load or store happens.
4223 This is null if the operation is unconditional. */
4224 tree mask = vect_get_load_store_mask (stmt_info);
4226 /* Make sure that the target supports an appropriate internal
4227 function for the gather/scatter operation. */
4228 gather_scatter_info gs_info;
4229 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4230 || gs_info.decl)
4231 return NULL;
4233 /* Convert the mask to the right form. */
4234 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4235 if (mask)
4236 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4237 loop_vinfo);
4239 /* Get the invariant base and non-invariant offset, converting the
4240 latter to the same width as the vector elements. */
4241 tree base = gs_info.base;
4242 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4243 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4244 stmt_info);
4246 /* Build the new pattern statement. */
4247 tree scale = size_int (gs_info.scale);
4248 gcall *pattern_stmt;
4249 if (DR_IS_READ (dr))
4251 if (mask != NULL)
4252 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4253 offset, scale, mask);
4254 else
4255 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4256 offset, scale);
4257 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4258 gimple_call_set_lhs (pattern_stmt, load_lhs);
4260 else
4262 tree rhs = vect_get_store_rhs (stmt_info);
4263 if (mask != NULL)
4264 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4265 base, offset, scale, rhs,
4266 mask);
4267 else
4268 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4269 base, offset, scale, rhs);
4271 gimple_call_set_nothrow (pattern_stmt, true);
4273 /* Copy across relevant vectorization info and associate DR with the
4274 new pattern statement instead of the original statement. */
4275 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4276 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4278 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4279 *type_out = vectype;
4280 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4282 return pattern_stmt;
4285 /* Return true if TYPE is a non-boolean integer type. These are the types
4286 that we want to consider for narrowing. */
4288 static bool
4289 vect_narrowable_type_p (tree type)
4291 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4294 /* Return true if the operation given by CODE can be truncated to N bits
4295 when only N bits of the output are needed. This is only true if bit N+1
4296 of the inputs has no effect on the low N bits of the result. */
4298 static bool
4299 vect_truncatable_operation_p (tree_code code)
4301 switch (code)
4303 case PLUS_EXPR:
4304 case MINUS_EXPR:
4305 case MULT_EXPR:
4306 case BIT_AND_EXPR:
4307 case BIT_IOR_EXPR:
4308 case BIT_XOR_EXPR:
4309 case COND_EXPR:
4310 return true;
4312 default:
4313 return false;
4317 /* Record that STMT_INFO could be changed from operating on TYPE to
4318 operating on a type with the precision and sign given by PRECISION
4319 and SIGN respectively. PRECISION is an arbitrary bit precision;
4320 it might not be a whole number of bytes. */
4322 static void
4323 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4324 unsigned int precision, signop sign)
4326 /* Round the precision up to a whole number of bytes. */
4327 precision = vect_element_precision (precision);
4328 if (precision < TYPE_PRECISION (type)
4329 && (!stmt_info->operation_precision
4330 || stmt_info->operation_precision > precision))
4332 stmt_info->operation_precision = precision;
4333 stmt_info->operation_sign = sign;
4337 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4338 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4339 is an arbitrary bit precision; it might not be a whole number of bytes. */
4341 static void
4342 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4343 unsigned int min_input_precision)
4345 /* This operation in isolation only requires the inputs to have
4346 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4347 that MIN_INPUT_PRECISION is a natural precision for the chain
4348 as a whole. E.g. consider something like:
4350 unsigned short *x, *y;
4351 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4353 The right shift can be done on unsigned chars, and only requires the
4354 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4355 approach would mean turning a natural chain of single-vector unsigned
4356 short operations into one that truncates "*x" and then extends
4357 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4358 operation and one vector for each unsigned char operation.
4359 This would be a significant pessimization.
4361 Instead only propagate the maximum of this precision and the precision
4362 required by the users of the result. This means that we don't pessimize
4363 the case above but continue to optimize things like:
4365 unsigned char *y;
4366 unsigned short *x;
4367 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4369 Here we would truncate two vectors of *x to a single vector of
4370 unsigned chars and use single-vector unsigned char operations for
4371 everything else, rather than doing two unsigned short copies of
4372 "(*x & 0xf0) >> 4" and then truncating the result. */
4373 min_input_precision = MAX (min_input_precision,
4374 stmt_info->min_output_precision);
4376 if (min_input_precision < TYPE_PRECISION (type)
4377 && (!stmt_info->min_input_precision
4378 || stmt_info->min_input_precision > min_input_precision))
4379 stmt_info->min_input_precision = min_input_precision;
4382 /* Subroutine of vect_determine_min_output_precision. Return true if
4383 we can calculate a reduced number of output bits for STMT_INFO,
4384 whose result is LHS. */
4386 static bool
4387 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4389 /* Take the maximum precision required by users of the result. */
4390 vec_info *vinfo = stmt_info->vinfo;
4391 unsigned int precision = 0;
4392 imm_use_iterator iter;
4393 use_operand_p use;
4394 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4396 gimple *use_stmt = USE_STMT (use);
4397 if (is_gimple_debug (use_stmt))
4398 continue;
4399 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4400 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4401 return false;
4402 precision = MAX (precision, use_stmt_info->min_input_precision);
4405 if (dump_enabled_p ())
4407 dump_printf_loc (MSG_NOTE, vect_location, "only the low %d bits of ",
4408 precision);
4409 dump_generic_expr (MSG_NOTE, TDF_SLIM, lhs);
4410 dump_printf (MSG_NOTE, " are significant\n");
4412 stmt_info->min_output_precision = precision;
4413 return true;
4416 /* Calculate min_output_precision for STMT_INFO. */
4418 static void
4419 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4421 /* We're only interested in statements with a narrowable result. */
4422 tree lhs = gimple_get_lhs (stmt_info->stmt);
4423 if (!lhs
4424 || TREE_CODE (lhs) != SSA_NAME
4425 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4426 return;
4428 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4429 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4432 /* Use range information to decide whether STMT (described by STMT_INFO)
4433 could be done in a narrower type. This is effectively a forward
4434 propagation, since it uses context-independent information that applies
4435 to all users of an SSA name. */
4437 static void
4438 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4440 tree lhs = gimple_assign_lhs (stmt);
4441 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4442 return;
4444 tree type = TREE_TYPE (lhs);
4445 if (!vect_narrowable_type_p (type))
4446 return;
4448 /* First see whether we have any useful range information for the result. */
4449 unsigned int precision = TYPE_PRECISION (type);
4450 signop sign = TYPE_SIGN (type);
4451 wide_int min_value, max_value;
4452 if (!vect_get_range_info (lhs, &min_value, &max_value))
4453 return;
4455 tree_code code = gimple_assign_rhs_code (stmt);
4456 unsigned int nops = gimple_num_ops (stmt);
4458 if (!vect_truncatable_operation_p (code))
4459 /* Check that all relevant input operands are compatible, and update
4460 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4461 for (unsigned int i = 1; i < nops; ++i)
4463 tree op = gimple_op (stmt, i);
4464 if (TREE_CODE (op) == INTEGER_CST)
4466 /* Don't require the integer to have RHS_TYPE (which it might
4467 not for things like shift amounts, etc.), but do require it
4468 to fit the type. */
4469 if (!int_fits_type_p (op, type))
4470 return;
4472 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4473 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4475 else if (TREE_CODE (op) == SSA_NAME)
4477 /* Ignore codes that don't take uniform arguments. */
4478 if (!types_compatible_p (TREE_TYPE (op), type))
4479 return;
4481 wide_int op_min_value, op_max_value;
4482 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4483 return;
4485 min_value = wi::min (min_value, op_min_value, sign);
4486 max_value = wi::max (max_value, op_max_value, sign);
4488 else
4489 return;
4492 /* Try to switch signed types for unsigned types if we can.
4493 This is better for two reasons. First, unsigned ops tend
4494 to be cheaper than signed ops. Second, it means that we can
4495 handle things like:
4497 signed char c;
4498 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4502 signed char c;
4503 unsigned short res_1 = (unsigned short) c & 0xff00;
4504 int res = (int) res_1;
4506 where the intermediate result res_1 has unsigned rather than
4507 signed type. */
4508 if (sign == SIGNED && !wi::neg_p (min_value))
4509 sign = UNSIGNED;
4511 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4512 unsigned int precision1 = wi::min_precision (min_value, sign);
4513 unsigned int precision2 = wi::min_precision (max_value, sign);
4514 unsigned int value_precision = MAX (precision1, precision2);
4515 if (value_precision >= precision)
4516 return;
4518 if (dump_enabled_p ())
4520 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4521 " without loss of precision: ",
4522 sign == SIGNED ? "signed" : "unsigned",
4523 value_precision);
4524 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4527 vect_set_operation_type (stmt_info, type, value_precision, sign);
4528 vect_set_min_input_precision (stmt_info, type, value_precision);
4531 /* Use information about the users of STMT's result to decide whether
4532 STMT (described by STMT_INFO) could be done in a narrower type.
4533 This is effectively a backward propagation. */
4535 static void
4536 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4538 tree_code code = gimple_assign_rhs_code (stmt);
4539 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4540 tree type = TREE_TYPE (gimple_op (stmt, opno));
4541 if (!vect_narrowable_type_p (type))
4542 return;
4544 unsigned int precision = TYPE_PRECISION (type);
4545 unsigned int operation_precision, min_input_precision;
4546 switch (code)
4548 CASE_CONVERT:
4549 /* Only the bits that contribute to the output matter. Don't change
4550 the precision of the operation itself. */
4551 operation_precision = precision;
4552 min_input_precision = stmt_info->min_output_precision;
4553 break;
4555 case LSHIFT_EXPR:
4556 case RSHIFT_EXPR:
4558 tree shift = gimple_assign_rhs2 (stmt);
4559 if (TREE_CODE (shift) != INTEGER_CST
4560 || !wi::ltu_p (wi::to_widest (shift), precision))
4561 return;
4562 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4563 if (code == LSHIFT_EXPR)
4565 /* We need CONST_SHIFT fewer bits of the input. */
4566 operation_precision = stmt_info->min_output_precision;
4567 min_input_precision = (MAX (operation_precision, const_shift)
4568 - const_shift);
4570 else
4572 /* We need CONST_SHIFT extra bits to do the operation. */
4573 operation_precision = (stmt_info->min_output_precision
4574 + const_shift);
4575 min_input_precision = operation_precision;
4577 break;
4580 default:
4581 if (vect_truncatable_operation_p (code))
4583 /* Input bit N has no effect on output bits N-1 and lower. */
4584 operation_precision = stmt_info->min_output_precision;
4585 min_input_precision = operation_precision;
4586 break;
4588 return;
4591 if (operation_precision < precision)
4593 if (dump_enabled_p ())
4595 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4596 " without affecting users: ",
4597 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4598 operation_precision);
4599 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4601 vect_set_operation_type (stmt_info, type, operation_precision,
4602 TYPE_SIGN (type));
4604 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4607 /* Handle vect_determine_precisions for STMT_INFO, given that we
4608 have already done so for the users of its result. */
4610 void
4611 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4613 vect_determine_min_output_precision (stmt_info);
4614 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4616 vect_determine_precisions_from_range (stmt_info, stmt);
4617 vect_determine_precisions_from_users (stmt_info, stmt);
4621 /* Walk backwards through the vectorizable region to determine the
4622 values of these fields:
4624 - min_output_precision
4625 - min_input_precision
4626 - operation_precision
4627 - operation_sign. */
4629 void
4630 vect_determine_precisions (vec_info *vinfo)
4632 DUMP_VECT_SCOPE ("vect_determine_precisions");
4634 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4636 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4637 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4638 unsigned int nbbs = loop->num_nodes;
4640 for (unsigned int i = 0; i < nbbs; i++)
4642 basic_block bb = bbs[nbbs - i - 1];
4643 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4644 !gsi_end_p (si); gsi_prev (&si))
4645 vect_determine_stmt_precisions
4646 (vinfo->lookup_stmt (gsi_stmt (si)));
4649 else
4651 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4652 gimple_stmt_iterator si = bb_vinfo->region_end;
4653 gimple *stmt;
4656 if (!gsi_stmt (si))
4657 si = gsi_last_bb (bb_vinfo->bb);
4658 else
4659 gsi_prev (&si);
4660 stmt = gsi_stmt (si);
4661 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
4662 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4663 vect_determine_stmt_precisions (stmt_info);
4665 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4669 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
4671 struct vect_recog_func
4673 vect_recog_func_ptr fn;
4674 const char *name;
4677 /* Note that ordering matters - the first pattern matching on a stmt is
4678 taken which means usually the more complex one needs to preceed the
4679 less comples onex (widen_sum only after dot_prod or sad for example). */
4680 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4681 { vect_recog_over_widening_pattern, "over_widening" },
4682 /* Must come after over_widening, which narrows the shift as much as
4683 possible beforehand. */
4684 { vect_recog_average_pattern, "average" },
4685 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4686 { vect_recog_widen_mult_pattern, "widen_mult" },
4687 { vect_recog_dot_prod_pattern, "dot_prod" },
4688 { vect_recog_sad_pattern, "sad" },
4689 { vect_recog_widen_sum_pattern, "widen_sum" },
4690 { vect_recog_pow_pattern, "pow" },
4691 { vect_recog_widen_shift_pattern, "widen_shift" },
4692 { vect_recog_rotate_pattern, "rotate" },
4693 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4694 { vect_recog_divmod_pattern, "divmod" },
4695 { vect_recog_mult_pattern, "mult" },
4696 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4697 { vect_recog_bool_pattern, "bool" },
4698 /* This must come before mask conversion, and includes the parts
4699 of mask conversion that are needed for gather and scatter
4700 internal functions. */
4701 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4702 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4705 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4707 /* Mark statements that are involved in a pattern. */
4709 static inline void
4710 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
4711 tree pattern_vectype)
4713 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4715 gimple *orig_pattern_stmt = NULL;
4716 if (is_pattern_stmt_p (orig_stmt_info))
4718 /* We're replacing a statement in an existing pattern definition
4719 sequence. */
4720 orig_pattern_stmt = orig_stmt_info->stmt;
4721 if (dump_enabled_p ())
4723 dump_printf_loc (MSG_NOTE, vect_location,
4724 "replacing earlier pattern ");
4725 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, orig_pattern_stmt, 0);
4728 /* To keep the book-keeping simple, just swap the lhs of the
4729 old and new statements, so that the old one has a valid but
4730 unused lhs. */
4731 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
4732 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
4733 gimple_set_lhs (pattern_stmt, old_lhs);
4735 if (dump_enabled_p ())
4737 dump_printf_loc (MSG_NOTE, vect_location, "with ");
4738 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4741 /* Switch to the statement that ORIG replaces. */
4742 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
4744 /* We shouldn't be replacing the main pattern statement. */
4745 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
4746 != orig_pattern_stmt);
4749 if (def_seq)
4750 for (gimple_stmt_iterator si = gsi_start (def_seq);
4751 !gsi_end_p (si); gsi_next (&si))
4752 vect_init_pattern_stmt (gsi_stmt (si), orig_stmt_info, pattern_vectype);
4754 if (orig_pattern_stmt)
4756 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4758 /* Insert all the new pattern statements before the original one. */
4759 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4760 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
4761 orig_def_seq);
4762 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4763 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4765 /* Remove the pattern statement that this new pattern replaces. */
4766 gsi_remove (&gsi, false);
4768 else
4769 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4772 /* Function vect_pattern_recog_1
4774 Input:
4775 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4776 computation pattern.
4777 STMT_INFO: A stmt from which the pattern search should start.
4779 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4780 a sequence of statements that has the same functionality and can be
4781 used to replace STMT_INFO. It returns the last statement in the sequence
4782 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
4783 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4784 statement, having first checked that the target supports the new operation
4785 in that type.
4787 This function also does some bookkeeping, as explained in the documentation
4788 for vect_recog_pattern. */
4790 static void
4791 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
4793 vec_info *vinfo = stmt_info->vinfo;
4794 gimple *pattern_stmt;
4795 loop_vec_info loop_vinfo;
4796 tree pattern_vectype;
4798 /* If this statement has already been replaced with pattern statements,
4799 leave the original statement alone, since the first match wins.
4800 Instead try to match against the definition statements that feed
4801 the main pattern statement. */
4802 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4804 gimple_stmt_iterator gsi;
4805 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4806 !gsi_end_p (gsi); gsi_next (&gsi))
4807 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
4808 return;
4811 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4812 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
4813 if (!pattern_stmt)
4815 /* Clear any half-formed pattern definition sequence. */
4816 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4817 return;
4820 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4821 gcc_assert (pattern_vectype);
4823 /* Found a vectorizable pattern. */
4824 if (dump_enabled_p ())
4826 dump_printf_loc (MSG_NOTE, vect_location,
4827 "%s pattern recognized: ", recog_func->name);
4828 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4831 /* Mark the stmts that are involved in the pattern. */
4832 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
4834 /* Patterns cannot be vectorized using SLP, because they change the order of
4835 computation. */
4836 if (loop_vinfo)
4838 unsigned ix, ix2;
4839 stmt_vec_info *elem_ptr;
4840 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4841 elem_ptr, *elem_ptr == stmt_info);
4846 /* Function vect_pattern_recog
4848 Input:
4849 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4850 computation idioms.
4852 Output - for each computation idiom that is detected we create a new stmt
4853 that provides the same functionality and that can be vectorized. We
4854 also record some information in the struct_stmt_info of the relevant
4855 stmts, as explained below:
4857 At the entry to this function we have the following stmts, with the
4858 following initial value in the STMT_VINFO fields:
4860 stmt in_pattern_p related_stmt vec_stmt
4861 S1: a_i = .... - - -
4862 S2: a_2 = ..use(a_i).. - - -
4863 S3: a_1 = ..use(a_2).. - - -
4864 S4: a_0 = ..use(a_1).. - - -
4865 S5: ... = ..use(a_0).. - - -
4867 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4868 represented by a single stmt. We then:
4869 - create a new stmt S6 equivalent to the pattern (the stmt is not
4870 inserted into the code)
4871 - fill in the STMT_VINFO fields as follows:
4873 in_pattern_p related_stmt vec_stmt
4874 S1: a_i = .... - - -
4875 S2: a_2 = ..use(a_i).. - - -
4876 S3: a_1 = ..use(a_2).. - - -
4877 S4: a_0 = ..use(a_1).. true S6 -
4878 '---> S6: a_new = .... - S4 -
4879 S5: ... = ..use(a_0).. - - -
4881 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4882 to each other through the RELATED_STMT field).
4884 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4885 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4886 remain irrelevant unless used by stmts other than S4.
4888 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4889 (because they are marked as irrelevant). It will vectorize S6, and record
4890 a pointer to the new vector stmt VS6 from S6 (as usual).
4891 S4 will be skipped, and S5 will be vectorized as usual:
4893 in_pattern_p related_stmt vec_stmt
4894 S1: a_i = .... - - -
4895 S2: a_2 = ..use(a_i).. - - -
4896 S3: a_1 = ..use(a_2).. - - -
4897 > VS6: va_new = .... - - -
4898 S4: a_0 = ..use(a_1).. true S6 VS6
4899 '---> S6: a_new = .... - S4 VS6
4900 > VS5: ... = ..vuse(va_new).. - - -
4901 S5: ... = ..use(a_0).. - - -
4903 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4904 elsewhere), and we'll end up with:
4906 VS6: va_new = ....
4907 VS5: ... = ..vuse(va_new)..
4909 In case of more than one pattern statements, e.g., widen-mult with
4910 intermediate type:
4912 S1 a_t = ;
4913 S2 a_T = (TYPE) a_t;
4914 '--> S3: a_it = (interm_type) a_t;
4915 S4 prod_T = a_T * CONST;
4916 '--> S5: prod_T' = a_it w* CONST;
4918 there may be other users of a_T outside the pattern. In that case S2 will
4919 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4920 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4921 be recorded in S3. */
4923 void
4924 vect_pattern_recog (vec_info *vinfo)
4926 struct loop *loop;
4927 basic_block *bbs;
4928 unsigned int nbbs;
4929 gimple_stmt_iterator si;
4930 unsigned int i, j;
4932 vect_determine_precisions (vinfo);
4934 DUMP_VECT_SCOPE ("vect_pattern_recog");
4936 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4938 loop = LOOP_VINFO_LOOP (loop_vinfo);
4939 bbs = LOOP_VINFO_BBS (loop_vinfo);
4940 nbbs = loop->num_nodes;
4942 /* Scan through the loop stmts, applying the pattern recognition
4943 functions starting at each stmt visited: */
4944 for (i = 0; i < nbbs; i++)
4946 basic_block bb = bbs[i];
4947 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4949 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
4950 /* Scan over all generic vect_recog_xxx_pattern functions. */
4951 for (j = 0; j < NUM_PATTERNS; j++)
4952 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
4953 stmt_info);
4957 else
4959 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4960 for (si = bb_vinfo->region_begin;
4961 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4963 gimple *stmt = gsi_stmt (si);
4964 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
4965 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
4966 continue;
4968 /* Scan over all generic vect_recog_xxx_pattern functions. */
4969 for (j = 0; j < NUM_PATTERNS; j++)
4970 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);