Update changelog entry.
[official-gcc.git] / gcc / tree-vect-patterns.c
blob2b56d85afc5962df3939befb40f7d81bc121ef7b
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
53 static bool
54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
56 value_range_kind vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
71 return true;
73 else
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
80 return false;
84 /* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
87 static void
88 vect_pattern_detected (const char *name, gimple *stmt)
90 if (dump_enabled_p ())
91 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
94 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
95 return the pattern statement's stmt_vec_info. Set its vector type to
96 VECTYPE if it doesn't have one already. */
98 static stmt_vec_info
99 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
100 tree vectype)
102 vec_info *vinfo = orig_stmt_info->vinfo;
103 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
104 if (pattern_stmt_info == NULL)
105 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
106 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
108 pattern_stmt_info->pattern_stmt_p = true;
109 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
110 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
111 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
112 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
113 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
114 return pattern_stmt_info;
117 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
118 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
119 have one already. */
121 static void
122 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
123 tree vectype)
125 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
126 STMT_VINFO_RELATED_STMT (orig_stmt_info)
127 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
130 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
131 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
132 be different from the vector type of the final pattern statement. */
134 static inline void
135 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
136 tree vectype = NULL_TREE)
138 vec_info *vinfo = stmt_info->vinfo;
139 if (vectype)
141 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
142 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
144 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
145 new_stmt);
148 /* The caller wants to perform new operations on vect_external variable
149 VAR, so that the result of the operations would also be vect_external.
150 Return the edge on which the operations can be performed, if one exists.
151 Return null if the operations should instead be treated as part of
152 the pattern that needs them. */
154 static edge
155 vect_get_external_def_edge (vec_info *vinfo, tree var)
157 edge e = NULL;
158 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
160 e = loop_preheader_edge (loop_vinfo->loop);
161 if (!SSA_NAME_IS_DEFAULT_DEF (var))
163 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
164 if (bb == NULL
165 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
166 e = NULL;
169 return e;
172 /* Return true if the target supports a vector version of CODE,
173 where CODE is known to map to a direct optab. ITYPE specifies
174 the type of (some of) the scalar inputs and OTYPE specifies the
175 type of the scalar result.
177 If CODE allows the inputs and outputs to have different type
178 (such as for WIDEN_SUM_EXPR), it is the input mode rather
179 than the output mode that determines the appropriate target pattern.
180 Operand 0 of the target pattern then specifies the mode that the output
181 must have.
183 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
184 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
185 is nonnull. */
187 static bool
188 vect_supportable_direct_optab_p (tree otype, tree_code code,
189 tree itype, tree *vecotype_out,
190 tree *vecitype_out = NULL)
192 tree vecitype = get_vectype_for_scalar_type (itype);
193 if (!vecitype)
194 return false;
196 tree vecotype = get_vectype_for_scalar_type (otype);
197 if (!vecotype)
198 return false;
200 optab optab = optab_for_tree_code (code, vecitype, optab_default);
201 if (!optab)
202 return false;
204 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
205 if (icode == CODE_FOR_nothing
206 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
207 return false;
209 *vecotype_out = vecotype;
210 if (vecitype_out)
211 *vecitype_out = vecitype;
212 return true;
215 /* Round bit precision PRECISION up to a full element. */
217 static unsigned int
218 vect_element_precision (unsigned int precision)
220 precision = 1 << ceil_log2 (precision);
221 return MAX (precision, BITS_PER_UNIT);
224 /* If OP is defined by a statement that's being considered for vectorization,
225 return information about that statement, otherwise return NULL. */
227 static stmt_vec_info
228 vect_get_internal_def (vec_info *vinfo, tree op)
230 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
231 if (def_stmt_info
232 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
233 return def_stmt_info;
234 return NULL;
237 /* Check whether NAME, an ssa-name used in STMT_VINFO,
238 is a result of a type promotion, such that:
239 DEF_STMT: NAME = NOP (name0)
240 If CHECK_SIGN is TRUE, check that either both types are signed or both are
241 unsigned. */
243 static bool
244 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
245 tree *orig_type, gimple **def_stmt, bool *promotion)
247 tree type = TREE_TYPE (name);
248 tree oprnd0;
249 enum vect_def_type dt;
251 stmt_vec_info def_stmt_info;
252 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
253 def_stmt))
254 return false;
256 if (dt != vect_internal_def
257 && dt != vect_external_def && dt != vect_constant_def)
258 return false;
260 if (!*def_stmt)
261 return false;
263 if (!is_gimple_assign (*def_stmt))
264 return false;
266 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
267 return false;
269 oprnd0 = gimple_assign_rhs1 (*def_stmt);
271 *orig_type = TREE_TYPE (oprnd0);
272 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
273 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
274 return false;
276 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
277 *promotion = true;
278 else
279 *promotion = false;
281 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
282 return false;
284 return true;
287 /* Holds information about an input operand after some sign changes
288 and type promotions have been peeled away. */
289 struct vect_unpromoted_value {
290 vect_unpromoted_value ();
292 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
294 /* The value obtained after peeling away zero or more casts. */
295 tree op;
297 /* The type of OP. */
298 tree type;
300 /* The definition type of OP. */
301 vect_def_type dt;
303 /* If OP is the result of peeling at least one cast, and if the cast
304 of OP itself is a vectorizable statement, CASTER identifies that
305 statement, otherwise it is null. */
306 stmt_vec_info caster;
309 inline vect_unpromoted_value::vect_unpromoted_value ()
310 : op (NULL_TREE),
311 type (NULL_TREE),
312 dt (vect_uninitialized_def),
313 caster (NULL)
317 /* Set the operand to OP_IN, its definition type to DT_IN, and the
318 statement that casts it to CASTER_IN. */
320 inline void
321 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
322 stmt_vec_info caster_in)
324 op = op_in;
325 type = TREE_TYPE (op);
326 dt = dt_in;
327 caster = caster_in;
330 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
331 to reach some vectorizable inner operand OP', continuing as long as it
332 is possible to convert OP' back to OP using a possible sign change
333 followed by a possible promotion P. Return this OP', or null if OP is
334 not a vectorizable SSA name. If there is a promotion P, describe its
335 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
336 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
337 have more than one user.
339 A successful return means that it is possible to go from OP' to OP
340 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
341 whereas the cast from UNPROM to OP might be a promotion, a sign
342 change, or a nop.
344 E.g. say we have:
346 signed short *ptr = ...;
347 signed short C = *ptr;
348 unsigned short B = (unsigned short) C; // sign change
349 signed int A = (signed int) B; // unsigned promotion
350 ...possible other uses of A...
351 unsigned int OP = (unsigned int) A; // sign change
353 In this case it's possible to go directly from C to OP using:
355 OP = (unsigned int) (unsigned short) C;
356 +------------+ +--------------+
357 promotion sign change
359 so OP' would be C. The input to the promotion is B, so UNPROM
360 would describe B. */
362 static tree
363 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
364 vect_unpromoted_value *unprom,
365 bool *single_use_p = NULL)
367 tree res = NULL_TREE;
368 tree op_type = TREE_TYPE (op);
369 unsigned int orig_precision = TYPE_PRECISION (op_type);
370 unsigned int min_precision = orig_precision;
371 stmt_vec_info caster = NULL;
372 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
374 /* See whether OP is simple enough to vectorize. */
375 stmt_vec_info def_stmt_info;
376 gimple *def_stmt;
377 vect_def_type dt;
378 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
379 break;
381 /* If OP is the input of a demotion, skip over it to see whether
382 OP is itself the result of a promotion. If so, the combined
383 effect of the promotion and the demotion might fit the required
384 pattern, otherwise neither operation fits.
386 This copes with cases such as the result of an arithmetic
387 operation being truncated before being stored, and where that
388 arithmetic operation has been recognized as an over-widened one. */
389 if (TYPE_PRECISION (op_type) <= min_precision)
391 /* Use OP as the UNPROM described above if we haven't yet
392 found a promotion, or if using the new input preserves the
393 sign of the previous promotion. */
394 if (!res
395 || TYPE_PRECISION (unprom->type) == orig_precision
396 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
398 unprom->set_op (op, dt, caster);
399 min_precision = TYPE_PRECISION (op_type);
401 /* Stop if we've already seen a promotion and if this
402 conversion does more than change the sign. */
403 else if (TYPE_PRECISION (op_type)
404 != TYPE_PRECISION (unprom->type))
405 break;
407 /* The sequence now extends to OP. */
408 res = op;
411 /* See whether OP is defined by a cast. Record it as CASTER if
412 the cast is potentially vectorizable. */
413 if (!def_stmt)
414 break;
415 caster = def_stmt_info;
417 /* Ignore pattern statements, since we don't link uses for them. */
418 if (caster
419 && single_use_p
420 && !STMT_VINFO_RELATED_STMT (caster)
421 && !has_single_use (res))
422 *single_use_p = false;
424 gassign *assign = dyn_cast <gassign *> (def_stmt);
425 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
426 break;
428 /* Continue with the input to the cast. */
429 op = gimple_assign_rhs1 (def_stmt);
430 op_type = TREE_TYPE (op);
432 return res;
435 /* OP is an integer operand to an operation that returns TYPE, and we
436 want to treat the operation as a widening one. So far we can treat
437 it as widening from *COMMON_TYPE.
439 Return true if OP is suitable for such a widening operation,
440 either widening from *COMMON_TYPE or from some supertype of it.
441 Update *COMMON_TYPE to the supertype in the latter case.
443 SHIFT_P is true if OP is a shift amount. */
445 static bool
446 vect_joust_widened_integer (tree type, bool shift_p, tree op,
447 tree *common_type)
449 /* Calculate the minimum precision required by OP, without changing
450 the sign of either operand. */
451 unsigned int precision;
452 if (shift_p)
454 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
455 return false;
456 precision = TREE_INT_CST_LOW (op);
458 else
460 precision = wi::min_precision (wi::to_widest (op),
461 TYPE_SIGN (*common_type));
462 if (precision * 2 > TYPE_PRECISION (type))
463 return false;
466 /* If OP requires a wider type, switch to that type. The checks
467 above ensure that this is still narrower than the result. */
468 precision = vect_element_precision (precision);
469 if (TYPE_PRECISION (*common_type) < precision)
470 *common_type = build_nonstandard_integer_type
471 (precision, TYPE_UNSIGNED (*common_type));
472 return true;
475 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
476 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
478 static bool
479 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
481 if (types_compatible_p (*common_type, new_type))
482 return true;
484 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
485 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
486 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
487 return true;
489 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
490 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
491 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
493 *common_type = new_type;
494 return true;
497 /* We have mismatched signs, with the signed type being
498 no wider than the unsigned type. In this case we need
499 a wider signed type. */
500 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
501 TYPE_PRECISION (new_type));
502 precision *= 2;
503 if (precision * 2 > TYPE_PRECISION (type))
504 return false;
506 *common_type = build_nonstandard_integer_type (precision, false);
507 return true;
510 /* Check whether STMT_INFO can be viewed as a tree of integer operations
511 in which each node either performs CODE or WIDENED_CODE, and where
512 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
513 specifies the maximum number of leaf operands. SHIFT_P says whether
514 CODE and WIDENED_CODE are some sort of shift.
516 If STMT_INFO is such a tree, return the number of leaf operands
517 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
518 to a type that (a) is narrower than the result of STMT_INFO and
519 (b) can hold all leaf operand values.
521 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
522 exists. */
524 static unsigned int
525 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
526 tree_code widened_code, bool shift_p,
527 unsigned int max_nops,
528 vect_unpromoted_value *unprom, tree *common_type)
530 /* Check for an integer operation with the right code. */
531 vec_info *vinfo = stmt_info->vinfo;
532 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
533 if (!assign)
534 return 0;
536 tree_code rhs_code = gimple_assign_rhs_code (assign);
537 if (rhs_code != code && rhs_code != widened_code)
538 return 0;
540 tree type = gimple_expr_type (assign);
541 if (!INTEGRAL_TYPE_P (type))
542 return 0;
544 /* Assume that both operands will be leaf operands. */
545 max_nops -= 2;
547 /* Check the operands. */
548 unsigned int next_op = 0;
549 for (unsigned int i = 0; i < 2; ++i)
551 vect_unpromoted_value *this_unprom = &unprom[next_op];
552 unsigned int nops = 1;
553 tree op = gimple_op (assign, i + 1);
554 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
556 /* We already have a common type from earlier operands.
557 Update it to account for OP. */
558 this_unprom->set_op (op, vect_constant_def);
559 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
560 return 0;
562 else
564 /* Only allow shifts by constants. */
565 if (shift_p && i == 1)
566 return 0;
568 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
569 this_unprom))
570 return 0;
572 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
574 /* The operand isn't widened. If STMT_INFO has the code
575 for an unwidened operation, recursively check whether
576 this operand is a node of the tree. */
577 if (rhs_code != code
578 || max_nops == 0
579 || this_unprom->dt != vect_internal_def)
580 return 0;
582 /* Give back the leaf slot allocated above now that we're
583 not treating this as a leaf operand. */
584 max_nops += 1;
586 /* Recursively process the definition of the operand. */
587 stmt_vec_info def_stmt_info
588 = vinfo->lookup_def (this_unprom->op);
589 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
590 shift_p, max_nops, this_unprom,
591 common_type);
592 if (nops == 0)
593 return 0;
595 max_nops -= nops;
597 else
599 /* Make sure that the operand is narrower than the result. */
600 if (TYPE_PRECISION (this_unprom->type) * 2
601 > TYPE_PRECISION (type))
602 return 0;
604 /* Update COMMON_TYPE for the new operand. */
605 if (i == 0)
606 *common_type = this_unprom->type;
607 else if (!vect_joust_widened_type (type, this_unprom->type,
608 common_type))
609 return 0;
612 next_op += nops;
614 return next_op;
617 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
618 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
620 static tree
621 vect_recog_temp_ssa_var (tree type, gimple *stmt)
623 return make_temp_ssa_name (type, stmt, "patt");
626 /* STMT2_INFO describes a type conversion that could be split into STMT1
627 followed by a version of STMT2_INFO that takes NEW_RHS as its first
628 input. Try to do this using pattern statements, returning true on
629 success. */
631 static bool
632 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
633 gimple *stmt1, tree vectype)
635 if (is_pattern_stmt_p (stmt2_info))
637 /* STMT2_INFO is part of a pattern. Get the statement to which
638 the pattern is attached. */
639 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
640 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "Splitting pattern statement: %G", stmt2_info->stmt);
646 /* Since STMT2_INFO is a pattern statement, we can change it
647 in-situ without worrying about changing the code for the
648 containing block. */
649 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
651 if (dump_enabled_p ())
653 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
654 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
655 stmt2_info->stmt);
658 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
659 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
660 /* STMT2_INFO is the actual pattern statement. Add STMT1
661 to the end of the definition sequence. */
662 gimple_seq_add_stmt_without_update (def_seq, stmt1);
663 else
665 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
666 before it. */
667 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
668 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
670 return true;
672 else
674 /* STMT2_INFO doesn't yet have a pattern. Try to create a
675 two-statement pattern now. */
676 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
677 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
678 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
679 if (!lhs_vectype)
680 return false;
682 if (dump_enabled_p ())
683 dump_printf_loc (MSG_NOTE, vect_location,
684 "Splitting statement: %G", stmt2_info->stmt);
686 /* Add STMT1 as a singleton pattern definition sequence. */
687 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
688 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
689 gimple_seq_add_stmt_without_update (def_seq, stmt1);
691 /* Build the second of the two pattern statements. */
692 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
693 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
694 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
696 if (dump_enabled_p ())
698 dump_printf_loc (MSG_NOTE, vect_location,
699 "into pattern statements: %G", stmt1);
700 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
703 return true;
707 /* Convert UNPROM to TYPE and return the result, adding new statements
708 to STMT_INFO's pattern definition statements if no better way is
709 available. VECTYPE is the vector form of TYPE. */
711 static tree
712 vect_convert_input (stmt_vec_info stmt_info, tree type,
713 vect_unpromoted_value *unprom, tree vectype)
715 /* Check for a no-op conversion. */
716 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
717 return unprom->op;
719 /* Allow the caller to create constant vect_unpromoted_values. */
720 if (TREE_CODE (unprom->op) == INTEGER_CST)
721 return wide_int_to_tree (type, wi::to_widest (unprom->op));
723 /* See if we can reuse an existing result. */
724 if (unprom->caster)
726 tree lhs = gimple_get_lhs (unprom->caster->stmt);
727 if (types_compatible_p (TREE_TYPE (lhs), type))
728 return lhs;
731 /* We need a new conversion statement. */
732 tree new_op = vect_recog_temp_ssa_var (type, NULL);
733 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
735 /* If the operation is the input to a vectorizable cast, try splitting
736 that cast into two, taking the required result as a mid-way point. */
737 if (unprom->caster)
739 tree lhs = gimple_get_lhs (unprom->caster->stmt);
740 if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
741 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
742 && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
743 && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
744 return new_op;
747 /* If OP is an external value, see if we can insert the new statement
748 on an incoming edge. */
749 if (unprom->dt == vect_external_def)
750 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
752 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
753 gcc_assert (!new_bb);
754 return new_op;
757 /* As a (common) last resort, add the statement to the pattern itself. */
758 append_pattern_def_seq (stmt_info, new_stmt, vectype);
759 return new_op;
762 /* Invoke vect_convert_input for N elements of UNPROM and store the
763 result in the corresponding elements of RESULT. */
765 static void
766 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
767 tree *result, tree type, vect_unpromoted_value *unprom,
768 tree vectype)
770 for (unsigned int i = 0; i < n; ++i)
772 unsigned int j;
773 for (j = 0; j < i; ++j)
774 if (unprom[j].op == unprom[i].op)
775 break;
776 if (j < i)
777 result[i] = result[j];
778 else
779 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
783 /* The caller has created a (possibly empty) sequence of pattern definition
784 statements followed by a single statement PATTERN_STMT. Cast the result
785 of this final statement to TYPE. If a new statement is needed, add
786 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
787 and return the new statement, otherwise return PATTERN_STMT as-is.
788 VECITYPE is the vector form of PATTERN_STMT's result type. */
790 static gimple *
791 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
792 tree vecitype)
794 tree lhs = gimple_get_lhs (pattern_stmt);
795 if (!types_compatible_p (type, TREE_TYPE (lhs)))
797 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
798 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
799 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
801 return pattern_stmt;
804 /* Return true if STMT_VINFO describes a reduction for which reassociation
805 is allowed. If STMT_INFO is part of a group, assume that it's part of
806 a reduction chain and optimistically assume that all statements
807 except the last allow reassociation. */
809 static bool
810 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
812 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
813 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
814 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
817 /* As above, but also require it to have code CODE and to be a reduction
818 in the outermost loop. When returning true, store the operands in
819 *OP0_OUT and *OP1_OUT. */
821 static bool
822 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
823 tree *op0_out, tree *op1_out)
825 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
826 if (!loop_info)
827 return false;
829 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
830 if (!assign || gimple_assign_rhs_code (assign) != code)
831 return false;
833 /* We don't allow changing the order of the computation in the inner-loop
834 when doing outer-loop vectorization. */
835 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
836 if (loop && nested_in_vect_loop_p (loop, stmt_info))
837 return false;
839 if (!vect_reassociating_reduction_p (stmt_info))
840 return false;
842 *op0_out = gimple_assign_rhs1 (assign);
843 *op1_out = gimple_assign_rhs2 (assign);
844 return true;
847 /* Function vect_recog_dot_prod_pattern
849 Try to find the following pattern:
851 type x_t, y_t;
852 TYPE1 prod;
853 TYPE2 sum = init;
854 loop:
855 sum_0 = phi <init, sum_1>
856 S1 x_t = ...
857 S2 y_t = ...
858 S3 x_T = (TYPE1) x_t;
859 S4 y_T = (TYPE1) y_t;
860 S5 prod = x_T * y_T;
861 [S6 prod = (TYPE2) prod; #optional]
862 S7 sum_1 = prod + sum_0;
864 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
865 same size of 'TYPE1' or bigger. This is a special case of a reduction
866 computation.
868 Input:
870 * STMT_VINFO: The stmt from which the pattern search begins. In the
871 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
872 will be detected.
874 Output:
876 * TYPE_OUT: The type of the output of this pattern.
878 * Return value: A new stmt that will be used to replace the sequence of
879 stmts that constitute the pattern. In this case it will be:
880 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
882 Note: The dot-prod idiom is a widening reduction pattern that is
883 vectorized without preserving all the intermediate results. It
884 produces only N/2 (widened) results (by summing up pairs of
885 intermediate results) rather than all N results. Therefore, we
886 cannot allow this pattern when we want to get all the results and in
887 the correct order (as is the case when this computation is in an
888 inner-loop nested in an outer-loop that us being vectorized). */
890 static gimple *
891 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
893 tree oprnd0, oprnd1;
894 gimple *last_stmt = stmt_vinfo->stmt;
895 vec_info *vinfo = stmt_vinfo->vinfo;
896 tree type, half_type;
897 gimple *pattern_stmt;
898 tree var;
900 /* Look for the following pattern
901 DX = (TYPE1) X;
902 DY = (TYPE1) Y;
903 DPROD = DX * DY;
904 DDPROD = (TYPE2) DPROD;
905 sum_1 = DDPROD + sum_0;
906 In which
907 - DX is double the size of X
908 - DY is double the size of Y
909 - DX, DY, DPROD all have the same type
910 - sum is the same size of DPROD or bigger
911 - sum has been recognized as a reduction variable.
913 This is equivalent to:
914 DPROD = X w* Y; #widen mult
915 sum_1 = DPROD w+ sum_0; #widen summation
917 DPROD = X w* Y; #widen mult
918 sum_1 = DPROD + sum_0; #summation
921 /* Starting from LAST_STMT, follow the defs of its uses in search
922 of the above pattern. */
924 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
925 &oprnd0, &oprnd1))
926 return NULL;
928 type = gimple_expr_type (last_stmt);
930 vect_unpromoted_value unprom_mult;
931 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
933 /* So far so good. Since last_stmt was detected as a (summation) reduction,
934 we know that oprnd1 is the reduction variable (defined by a loop-header
935 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
936 Left to check that oprnd0 is defined by a (widen_)mult_expr */
937 if (!oprnd0)
938 return NULL;
940 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
941 if (!mult_vinfo)
942 return NULL;
944 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
945 inside the loop (in case we are analyzing an outer-loop). */
946 vect_unpromoted_value unprom0[2];
947 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
948 false, 2, unprom0, &half_type))
949 return NULL;
951 /* If there are two widening operations, make sure they agree on
952 the sign of the extension. */
953 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
954 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
955 return NULL;
957 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
959 tree half_vectype;
960 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
961 type_out, &half_vectype))
962 return NULL;
964 /* Get the inputs in the appropriate types. */
965 tree mult_oprnd[2];
966 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
967 unprom0, half_vectype);
969 var = vect_recog_temp_ssa_var (type, NULL);
970 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
971 mult_oprnd[0], mult_oprnd[1], oprnd1);
973 return pattern_stmt;
977 /* Function vect_recog_sad_pattern
979 Try to find the following Sum of Absolute Difference (SAD) pattern:
981 type x_t, y_t;
982 signed TYPE1 diff, abs_diff;
983 TYPE2 sum = init;
984 loop:
985 sum_0 = phi <init, sum_1>
986 S1 x_t = ...
987 S2 y_t = ...
988 S3 x_T = (TYPE1) x_t;
989 S4 y_T = (TYPE1) y_t;
990 S5 diff = x_T - y_T;
991 S6 abs_diff = ABS_EXPR <diff>;
992 [S7 abs_diff = (TYPE2) abs_diff; #optional]
993 S8 sum_1 = abs_diff + sum_0;
995 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
996 same size of 'TYPE1' or bigger. This is a special case of a reduction
997 computation.
999 Input:
1001 * STMT_VINFO: The stmt from which the pattern search begins. In the
1002 example, when this function is called with S8, the pattern
1003 {S3,S4,S5,S6,S7,S8} will be detected.
1005 Output:
1007 * TYPE_OUT: The type of the output of this pattern.
1009 * Return value: A new stmt that will be used to replace the sequence of
1010 stmts that constitute the pattern. In this case it will be:
1011 SAD_EXPR <x_t, y_t, sum_0>
1014 static gimple *
1015 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1017 gimple *last_stmt = stmt_vinfo->stmt;
1018 vec_info *vinfo = stmt_vinfo->vinfo;
1019 tree half_type;
1021 /* Look for the following pattern
1022 DX = (TYPE1) X;
1023 DY = (TYPE1) Y;
1024 DDIFF = DX - DY;
1025 DAD = ABS_EXPR <DDIFF>;
1026 DDPROD = (TYPE2) DPROD;
1027 sum_1 = DAD + sum_0;
1028 In which
1029 - DX is at least double the size of X
1030 - DY is at least double the size of Y
1031 - DX, DY, DDIFF, DAD all have the same type
1032 - sum is the same size of DAD or bigger
1033 - sum has been recognized as a reduction variable.
1035 This is equivalent to:
1036 DDIFF = X w- Y; #widen sub
1037 DAD = ABS_EXPR <DDIFF>;
1038 sum_1 = DAD w+ sum_0; #widen summation
1040 DDIFF = X w- Y; #widen sub
1041 DAD = ABS_EXPR <DDIFF>;
1042 sum_1 = DAD + sum_0; #summation
1045 /* Starting from LAST_STMT, follow the defs of its uses in search
1046 of the above pattern. */
1048 tree plus_oprnd0, plus_oprnd1;
1049 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1050 &plus_oprnd0, &plus_oprnd1))
1051 return NULL;
1053 tree sum_type = gimple_expr_type (last_stmt);
1055 /* Any non-truncating sequence of conversions is OK here, since
1056 with a successful match, the result of the ABS(U) is known to fit
1057 within the nonnegative range of the result type. (It cannot be the
1058 negative of the minimum signed value due to the range of the widening
1059 MINUS_EXPR.) */
1060 vect_unpromoted_value unprom_abs;
1061 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1062 &unprom_abs);
1064 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1065 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1066 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1067 Then check that plus_oprnd0 is defined by an abs_expr. */
1069 if (!plus_oprnd0)
1070 return NULL;
1072 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1073 if (!abs_stmt_vinfo)
1074 return NULL;
1076 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1077 inside the loop (in case we are analyzing an outer-loop). */
1078 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1079 if (!abs_stmt
1080 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1081 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1082 return NULL;
1084 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1085 tree abs_type = TREE_TYPE (abs_oprnd);
1086 if (TYPE_UNSIGNED (abs_type))
1087 return NULL;
1089 /* Peel off conversions from the ABS input. This can involve sign
1090 changes (e.g. from an unsigned subtraction to a signed ABS input)
1091 or signed promotion, but it can't include unsigned promotion.
1092 (Note that ABS of an unsigned promotion should have been folded
1093 away before now anyway.) */
1094 vect_unpromoted_value unprom_diff;
1095 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1096 &unprom_diff);
1097 if (!abs_oprnd)
1098 return NULL;
1099 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1100 && TYPE_UNSIGNED (unprom_diff.type))
1101 return NULL;
1103 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1104 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1105 if (!diff_stmt_vinfo)
1106 return NULL;
1108 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1109 inside the loop (in case we are analyzing an outer-loop). */
1110 vect_unpromoted_value unprom[2];
1111 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1112 false, 2, unprom, &half_type))
1113 return NULL;
1115 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1117 tree half_vectype;
1118 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1119 type_out, &half_vectype))
1120 return NULL;
1122 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1123 tree sad_oprnd[2];
1124 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1125 unprom, half_vectype);
1127 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1128 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1129 sad_oprnd[1], plus_oprnd1);
1131 return pattern_stmt;
1134 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1135 so that it can be treated as though it had the form:
1137 A_TYPE a;
1138 B_TYPE b;
1139 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1140 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1141 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1142 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1143 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1145 Try to replace the pattern with:
1147 A_TYPE a;
1148 B_TYPE b;
1149 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1150 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1151 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1152 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1154 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1156 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1157 name of the pattern being matched, for dump purposes. */
1159 static gimple *
1160 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1161 tree_code orig_code, tree_code wide_code,
1162 bool shift_p, const char *name)
1164 gimple *last_stmt = last_stmt_info->stmt;
1166 vect_unpromoted_value unprom[2];
1167 tree half_type;
1168 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1169 shift_p, 2, unprom, &half_type))
1170 return NULL;
1172 /* Pattern detected. */
1173 vect_pattern_detected (name, last_stmt);
1175 tree type = gimple_expr_type (last_stmt);
1176 tree itype = type;
1177 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1178 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1179 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1180 TYPE_UNSIGNED (half_type));
1182 /* Check target support */
1183 tree vectype = get_vectype_for_scalar_type (half_type);
1184 tree vecitype = get_vectype_for_scalar_type (itype);
1185 enum tree_code dummy_code;
1186 int dummy_int;
1187 auto_vec<tree> dummy_vec;
1188 if (!vectype
1189 || !vecitype
1190 || !supportable_widening_operation (wide_code, last_stmt_info,
1191 vecitype, vectype,
1192 &dummy_code, &dummy_code,
1193 &dummy_int, &dummy_vec))
1194 return NULL;
1196 *type_out = get_vectype_for_scalar_type (type);
1197 if (!*type_out)
1198 return NULL;
1200 tree oprnd[2];
1201 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1203 tree var = vect_recog_temp_ssa_var (itype, NULL);
1204 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1205 oprnd[0], oprnd[1]);
1207 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1210 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1211 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1213 static gimple *
1214 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1216 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1217 WIDEN_MULT_EXPR, false,
1218 "vect_recog_widen_mult_pattern");
1221 /* Function vect_recog_pow_pattern
1223 Try to find the following pattern:
1225 x = POW (y, N);
1227 with POW being one of pow, powf, powi, powif and N being
1228 either 2 or 0.5.
1230 Input:
1232 * STMT_VINFO: The stmt from which the pattern search begins.
1234 Output:
1236 * TYPE_OUT: The type of the output of this pattern.
1238 * Return value: A new stmt that will be used to replace the sequence of
1239 stmts that constitute the pattern. In this case it will be:
1240 x = x * x
1242 x = sqrt (x)
1245 static gimple *
1246 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1248 gimple *last_stmt = stmt_vinfo->stmt;
1249 tree base, exp;
1250 gimple *stmt;
1251 tree var;
1253 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1254 return NULL;
1256 switch (gimple_call_combined_fn (last_stmt))
1258 CASE_CFN_POW:
1259 CASE_CFN_POWI:
1260 break;
1262 default:
1263 return NULL;
1266 base = gimple_call_arg (last_stmt, 0);
1267 exp = gimple_call_arg (last_stmt, 1);
1268 if (TREE_CODE (exp) != REAL_CST
1269 && TREE_CODE (exp) != INTEGER_CST)
1271 if (flag_unsafe_math_optimizations
1272 && TREE_CODE (base) == REAL_CST
1273 && !gimple_call_internal_p (last_stmt))
1275 combined_fn log_cfn;
1276 built_in_function exp_bfn;
1277 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1279 case BUILT_IN_POW:
1280 log_cfn = CFN_BUILT_IN_LOG;
1281 exp_bfn = BUILT_IN_EXP;
1282 break;
1283 case BUILT_IN_POWF:
1284 log_cfn = CFN_BUILT_IN_LOGF;
1285 exp_bfn = BUILT_IN_EXPF;
1286 break;
1287 case BUILT_IN_POWL:
1288 log_cfn = CFN_BUILT_IN_LOGL;
1289 exp_bfn = BUILT_IN_EXPL;
1290 break;
1291 default:
1292 return NULL;
1294 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1295 tree exp_decl = builtin_decl_implicit (exp_bfn);
1296 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1297 does that, but if C is a power of 2, we want to use
1298 exp2 (log2 (C) * x) in the non-vectorized version, but for
1299 vectorization we don't have vectorized exp2. */
1300 if (logc
1301 && TREE_CODE (logc) == REAL_CST
1302 && exp_decl
1303 && lookup_attribute ("omp declare simd",
1304 DECL_ATTRIBUTES (exp_decl)))
1306 cgraph_node *node = cgraph_node::get_create (exp_decl);
1307 if (node->simd_clones == NULL)
1309 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1310 || node->definition)
1311 return NULL;
1312 expand_simd_clones (node);
1313 if (node->simd_clones == NULL)
1314 return NULL;
1316 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1317 if (!*type_out)
1318 return NULL;
1319 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1320 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1321 append_pattern_def_seq (stmt_vinfo, g);
1322 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1323 g = gimple_build_call (exp_decl, 1, def);
1324 gimple_call_set_lhs (g, res);
1325 return g;
1329 return NULL;
1332 /* We now have a pow or powi builtin function call with a constant
1333 exponent. */
1335 /* Catch squaring. */
1336 if ((tree_fits_shwi_p (exp)
1337 && tree_to_shwi (exp) == 2)
1338 || (TREE_CODE (exp) == REAL_CST
1339 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1341 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1342 TREE_TYPE (base), type_out))
1343 return NULL;
1345 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1346 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1347 return stmt;
1350 /* Catch square root. */
1351 if (TREE_CODE (exp) == REAL_CST
1352 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1354 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1355 if (*type_out
1356 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1357 OPTIMIZE_FOR_SPEED))
1359 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1360 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1361 gimple_call_set_lhs (stmt, var);
1362 gimple_call_set_nothrow (stmt, true);
1363 return stmt;
1367 return NULL;
1371 /* Function vect_recog_widen_sum_pattern
1373 Try to find the following pattern:
1375 type x_t;
1376 TYPE x_T, sum = init;
1377 loop:
1378 sum_0 = phi <init, sum_1>
1379 S1 x_t = *p;
1380 S2 x_T = (TYPE) x_t;
1381 S3 sum_1 = x_T + sum_0;
1383 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1384 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1385 a special case of a reduction computation.
1387 Input:
1389 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1390 when this function is called with S3, the pattern {S2,S3} will be detected.
1392 Output:
1394 * TYPE_OUT: The type of the output of this pattern.
1396 * Return value: A new stmt that will be used to replace the sequence of
1397 stmts that constitute the pattern. In this case it will be:
1398 WIDEN_SUM <x_t, sum_0>
1400 Note: The widening-sum idiom is a widening reduction pattern that is
1401 vectorized without preserving all the intermediate results. It
1402 produces only N/2 (widened) results (by summing up pairs of
1403 intermediate results) rather than all N results. Therefore, we
1404 cannot allow this pattern when we want to get all the results and in
1405 the correct order (as is the case when this computation is in an
1406 inner-loop nested in an outer-loop that us being vectorized). */
1408 static gimple *
1409 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1411 gimple *last_stmt = stmt_vinfo->stmt;
1412 tree oprnd0, oprnd1;
1413 vec_info *vinfo = stmt_vinfo->vinfo;
1414 tree type;
1415 gimple *pattern_stmt;
1416 tree var;
1418 /* Look for the following pattern
1419 DX = (TYPE) X;
1420 sum_1 = DX + sum_0;
1421 In which DX is at least double the size of X, and sum_1 has been
1422 recognized as a reduction variable.
1425 /* Starting from LAST_STMT, follow the defs of its uses in search
1426 of the above pattern. */
1428 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1429 &oprnd0, &oprnd1))
1430 return NULL;
1432 type = gimple_expr_type (last_stmt);
1434 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1435 we know that oprnd1 is the reduction variable (defined by a loop-header
1436 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1437 Left to check that oprnd0 is defined by a cast from type 'type' to type
1438 'TYPE'. */
1440 vect_unpromoted_value unprom0;
1441 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1442 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1443 return NULL;
1445 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1447 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1448 type_out))
1449 return NULL;
1451 var = vect_recog_temp_ssa_var (type, NULL);
1452 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1454 return pattern_stmt;
1457 /* Recognize cases in which an operation is performed in one type WTYPE
1458 but could be done more efficiently in a narrower type NTYPE. For example,
1459 if we have:
1461 ATYPE a; // narrower than NTYPE
1462 BTYPE b; // narrower than NTYPE
1463 WTYPE aw = (WTYPE) a;
1464 WTYPE bw = (WTYPE) b;
1465 WTYPE res = aw + bw; // only uses of aw and bw
1467 then it would be more efficient to do:
1469 NTYPE an = (NTYPE) a;
1470 NTYPE bn = (NTYPE) b;
1471 NTYPE resn = an + bn;
1472 WTYPE res = (WTYPE) resn;
1474 Other situations include things like:
1476 ATYPE a; // NTYPE or narrower
1477 WTYPE aw = (WTYPE) a;
1478 WTYPE res = aw + b;
1480 when only "(NTYPE) res" is significant. In that case it's more efficient
1481 to truncate "b" and do the operation on NTYPE instead:
1483 NTYPE an = (NTYPE) a;
1484 NTYPE bn = (NTYPE) b; // truncation
1485 NTYPE resn = an + bn;
1486 WTYPE res = (WTYPE) resn;
1488 All users of "res" should then use "resn" instead, making the final
1489 statement dead (not marked as relevant). The final statement is still
1490 needed to maintain the type correctness of the IR.
1492 vect_determine_precisions has already determined the minimum
1493 precison of the operation and the minimum precision required
1494 by users of the result. */
1496 static gimple *
1497 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1499 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1500 if (!last_stmt)
1501 return NULL;
1503 /* See whether we have found that this operation can be done on a
1504 narrower type without changing its semantics. */
1505 unsigned int new_precision = last_stmt_info->operation_precision;
1506 if (!new_precision)
1507 return NULL;
1509 vec_info *vinfo = last_stmt_info->vinfo;
1510 tree lhs = gimple_assign_lhs (last_stmt);
1511 tree type = TREE_TYPE (lhs);
1512 tree_code code = gimple_assign_rhs_code (last_stmt);
1514 /* Keep the first operand of a COND_EXPR as-is: only the other two
1515 operands are interesting. */
1516 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1518 /* Check the operands. */
1519 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1520 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1521 unprom.quick_grow (nops);
1522 unsigned int min_precision = 0;
1523 bool single_use_p = false;
1524 for (unsigned int i = 0; i < nops; ++i)
1526 tree op = gimple_op (last_stmt, first_op + i);
1527 if (TREE_CODE (op) == INTEGER_CST)
1528 unprom[i].set_op (op, vect_constant_def);
1529 else if (TREE_CODE (op) == SSA_NAME)
1531 bool op_single_use_p = true;
1532 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1533 &op_single_use_p))
1534 return NULL;
1535 /* If:
1537 (1) N bits of the result are needed;
1538 (2) all inputs are widened from M<N bits; and
1539 (3) one operand OP is a single-use SSA name
1541 we can shift the M->N widening from OP to the output
1542 without changing the number or type of extensions involved.
1543 This then reduces the number of copies of STMT_INFO.
1545 If instead of (3) more than one operand is a single-use SSA name,
1546 shifting the extension to the output is even more of a win.
1548 If instead:
1550 (1) N bits of the result are needed;
1551 (2) one operand OP2 is widened from M2<N bits;
1552 (3) another operand OP1 is widened from M1<M2 bits; and
1553 (4) both OP1 and OP2 are single-use
1555 the choice is between:
1557 (a) truncating OP2 to M1, doing the operation on M1,
1558 and then widening the result to N
1560 (b) widening OP1 to M2, doing the operation on M2, and then
1561 widening the result to N
1563 Both shift the M2->N widening of the inputs to the output.
1564 (a) additionally shifts the M1->M2 widening to the output;
1565 it requires fewer copies of STMT_INFO but requires an extra
1566 M2->M1 truncation.
1568 Which is better will depend on the complexity and cost of
1569 STMT_INFO, which is hard to predict at this stage. However,
1570 a clear tie-breaker in favor of (b) is the fact that the
1571 truncation in (a) increases the length of the operation chain.
1573 If instead of (4) only one of OP1 or OP2 is single-use,
1574 (b) is still a win over doing the operation in N bits:
1575 it still shifts the M2->N widening on the single-use operand
1576 to the output and reduces the number of STMT_INFO copies.
1578 If neither operand is single-use then operating on fewer than
1579 N bits might lead to more extensions overall. Whether it does
1580 or not depends on global information about the vectorization
1581 region, and whether that's a good trade-off would again
1582 depend on the complexity and cost of the statements involved,
1583 as well as things like register pressure that are not normally
1584 modelled at this stage. We therefore ignore these cases
1585 and just optimize the clear single-use wins above.
1587 Thus we take the maximum precision of the unpromoted operands
1588 and record whether any operand is single-use. */
1589 if (unprom[i].dt == vect_internal_def)
1591 min_precision = MAX (min_precision,
1592 TYPE_PRECISION (unprom[i].type));
1593 single_use_p |= op_single_use_p;
1598 /* Although the operation could be done in operation_precision, we have
1599 to balance that against introducing extra truncations or extensions.
1600 Calculate the minimum precision that can be handled efficiently.
1602 The loop above determined that the operation could be handled
1603 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1604 extension from the inputs to the output without introducing more
1605 instructions, and would reduce the number of instructions required
1606 for STMT_INFO itself.
1608 vect_determine_precisions has also determined that the result only
1609 needs min_output_precision bits. Truncating by a factor of N times
1610 requires a tree of N - 1 instructions, so if TYPE is N times wider
1611 than min_output_precision, doing the operation in TYPE and truncating
1612 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1613 In contrast:
1615 - truncating the input to a unary operation and doing the operation
1616 in the new type requires at most N - 1 + 1 = N instructions per
1617 output vector
1619 - doing the same for a binary operation requires at most
1620 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1622 Both unary and binary operations require fewer instructions than
1623 this if the operands were extended from a suitable truncated form.
1624 Thus there is usually nothing to lose by doing operations in
1625 min_output_precision bits, but there can be something to gain. */
1626 if (!single_use_p)
1627 min_precision = last_stmt_info->min_output_precision;
1628 else
1629 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1631 /* Apply the minimum efficient precision we just calculated. */
1632 if (new_precision < min_precision)
1633 new_precision = min_precision;
1634 if (new_precision >= TYPE_PRECISION (type))
1635 return NULL;
1637 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1639 *type_out = get_vectype_for_scalar_type (type);
1640 if (!*type_out)
1641 return NULL;
1643 /* We've found a viable pattern. Get the new type of the operation. */
1644 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1645 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1647 /* We specifically don't check here whether the target supports the
1648 new operation, since it might be something that a later pattern
1649 wants to rewrite anyway. If targets have a minimum element size
1650 for some optabs, we should pattern-match smaller ops to larger ops
1651 where beneficial. */
1652 tree new_vectype = get_vectype_for_scalar_type (new_type);
1653 if (!new_vectype)
1654 return NULL;
1656 if (dump_enabled_p ())
1657 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1658 type, new_type);
1660 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1661 tree ops[3] = {};
1662 for (unsigned int i = 1; i < first_op; ++i)
1663 ops[i - 1] = gimple_op (last_stmt, i);
1664 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1665 new_type, &unprom[0], new_vectype);
1667 /* Use the operation to produce a result of type NEW_TYPE. */
1668 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1669 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1670 ops[0], ops[1], ops[2]);
1671 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1673 if (dump_enabled_p ())
1674 dump_printf_loc (MSG_NOTE, vect_location,
1675 "created pattern stmt: %G", pattern_stmt);
1677 pattern_stmt = vect_convert_output (last_stmt_info, type,
1678 pattern_stmt, new_vectype);
1680 return pattern_stmt;
1683 /* Recognize the patterns:
1685 ATYPE a; // narrower than TYPE
1686 BTYPE b; // narrower than TYPE
1687 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1688 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1690 where only the bottom half of avg is used. Try to transform them into:
1692 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1693 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1695 followed by:
1697 TYPE avg = (TYPE) avg';
1699 where NTYPE is no wider than half of TYPE. Since only the bottom half
1700 of avg is used, all or part of the cast of avg' should become redundant. */
1702 static gimple *
1703 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1705 /* Check for a shift right by one bit. */
1706 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1707 vec_info *vinfo = last_stmt_info->vinfo;
1708 if (!last_stmt
1709 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1710 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1711 return NULL;
1713 /* Check that the shift result is wider than the users of the
1714 result need (i.e. that narrowing would be a natural choice). */
1715 tree lhs = gimple_assign_lhs (last_stmt);
1716 tree type = TREE_TYPE (lhs);
1717 unsigned int target_precision
1718 = vect_element_precision (last_stmt_info->min_output_precision);
1719 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1720 return NULL;
1722 /* Get the definition of the shift input. */
1723 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1724 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1725 if (!plus_stmt_info)
1726 return NULL;
1728 /* Check whether the shift input can be seen as a tree of additions on
1729 2 or 3 widened inputs.
1731 Note that the pattern should be a win even if the result of one or
1732 more additions is reused elsewhere: if the pattern matches, we'd be
1733 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1734 internal_fn ifn = IFN_AVG_FLOOR;
1735 vect_unpromoted_value unprom[3];
1736 tree new_type;
1737 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1738 PLUS_EXPR, false, 3,
1739 unprom, &new_type);
1740 if (nops == 0)
1741 return NULL;
1742 if (nops == 3)
1744 /* Check that one operand is 1. */
1745 unsigned int i;
1746 for (i = 0; i < 3; ++i)
1747 if (integer_onep (unprom[i].op))
1748 break;
1749 if (i == 3)
1750 return NULL;
1751 /* Throw away the 1 operand and keep the other two. */
1752 if (i < 2)
1753 unprom[i] = unprom[2];
1754 ifn = IFN_AVG_CEIL;
1757 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1759 /* We know that:
1761 (a) the operation can be viewed as:
1763 TYPE widened0 = (TYPE) UNPROM[0];
1764 TYPE widened1 = (TYPE) UNPROM[1];
1765 TYPE tmp1 = widened0 + widened1 {+ 1};
1766 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1768 (b) the first two statements are equivalent to:
1770 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1771 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1773 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1774 where sensible;
1776 (d) all the operations can be performed correctly at twice the width of
1777 NEW_TYPE, due to the nature of the average operation; and
1779 (e) users of the result of the right shift need only TARGET_PRECISION
1780 bits, where TARGET_PRECISION is no more than half of TYPE's
1781 precision.
1783 Under these circumstances, the only situation in which NEW_TYPE
1784 could be narrower than TARGET_PRECISION is if widened0, widened1
1785 and an addition result are all used more than once. Thus we can
1786 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1787 as "free", whereas widening the result of the average instruction
1788 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1789 therefore better not to go narrower than TARGET_PRECISION. */
1790 if (TYPE_PRECISION (new_type) < target_precision)
1791 new_type = build_nonstandard_integer_type (target_precision,
1792 TYPE_UNSIGNED (new_type));
1794 /* Check for target support. */
1795 tree new_vectype = get_vectype_for_scalar_type (new_type);
1796 if (!new_vectype
1797 || !direct_internal_fn_supported_p (ifn, new_vectype,
1798 OPTIMIZE_FOR_SPEED))
1799 return NULL;
1801 /* The IR requires a valid vector type for the cast result, even though
1802 it's likely to be discarded. */
1803 *type_out = get_vectype_for_scalar_type (type);
1804 if (!*type_out)
1805 return NULL;
1807 /* Generate the IFN_AVG* call. */
1808 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1809 tree new_ops[2];
1810 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1811 unprom, new_vectype);
1812 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1813 new_ops[1]);
1814 gimple_call_set_lhs (average_stmt, new_var);
1815 gimple_set_location (average_stmt, gimple_location (last_stmt));
1817 if (dump_enabled_p ())
1818 dump_printf_loc (MSG_NOTE, vect_location,
1819 "created pattern stmt: %G", average_stmt);
1821 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1824 /* Recognize cases in which the input to a cast is wider than its
1825 output, and the input is fed by a widening operation. Fold this
1826 by removing the unnecessary intermediate widening. E.g.:
1828 unsigned char a;
1829 unsigned int b = (unsigned int) a;
1830 unsigned short c = (unsigned short) b;
1834 unsigned short c = (unsigned short) a;
1836 Although this is rare in input IR, it is an expected side-effect
1837 of the over-widening pattern above.
1839 This is beneficial also for integer-to-float conversions, if the
1840 widened integer has more bits than the float, and if the unwidened
1841 input doesn't. */
1843 static gimple *
1844 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1846 /* Check for a cast, including an integer-to-float conversion. */
1847 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1848 if (!last_stmt)
1849 return NULL;
1850 tree_code code = gimple_assign_rhs_code (last_stmt);
1851 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1852 return NULL;
1854 /* Make sure that the rhs is a scalar with a natural bitsize. */
1855 tree lhs = gimple_assign_lhs (last_stmt);
1856 if (!lhs)
1857 return NULL;
1858 tree lhs_type = TREE_TYPE (lhs);
1859 scalar_mode lhs_mode;
1860 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1861 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1862 return NULL;
1864 /* Check for a narrowing operation (from a vector point of view). */
1865 tree rhs = gimple_assign_rhs1 (last_stmt);
1866 tree rhs_type = TREE_TYPE (rhs);
1867 if (!INTEGRAL_TYPE_P (rhs_type)
1868 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1869 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1870 return NULL;
1872 /* Try to find an unpromoted input. */
1873 vec_info *vinfo = last_stmt_info->vinfo;
1874 vect_unpromoted_value unprom;
1875 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1876 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1877 return NULL;
1879 /* If the bits above RHS_TYPE matter, make sure that they're the
1880 same when extending from UNPROM as they are when extending from RHS. */
1881 if (!INTEGRAL_TYPE_P (lhs_type)
1882 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1883 return NULL;
1885 /* We can get the same result by casting UNPROM directly, to avoid
1886 the unnecessary widening and narrowing. */
1887 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1889 *type_out = get_vectype_for_scalar_type (lhs_type);
1890 if (!*type_out)
1891 return NULL;
1893 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1894 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1895 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1897 return pattern_stmt;
1900 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1901 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1903 static gimple *
1904 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1906 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
1907 WIDEN_LSHIFT_EXPR, true,
1908 "vect_recog_widen_shift_pattern");
1911 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1913 type a_t, b_t, c_t;
1915 S0 a_t = b_t r<< c_t;
1917 Input/Output:
1919 * STMT_VINFO: The stmt from which the pattern search begins,
1920 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1921 with a sequence:
1923 S1 d_t = -c_t;
1924 S2 e_t = d_t & (B - 1);
1925 S3 f_t = b_t << c_t;
1926 S4 g_t = b_t >> e_t;
1927 S0 a_t = f_t | g_t;
1929 where B is element bitsize of type.
1931 Output:
1933 * TYPE_OUT: The type of the output of this pattern.
1935 * Return value: A new stmt that will be used to replace the rotate
1936 S0 stmt. */
1938 static gimple *
1939 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1941 gimple *last_stmt = stmt_vinfo->stmt;
1942 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1943 gimple *pattern_stmt, *def_stmt;
1944 enum tree_code rhs_code;
1945 vec_info *vinfo = stmt_vinfo->vinfo;
1946 enum vect_def_type dt;
1947 optab optab1, optab2;
1948 edge ext_def = NULL;
1950 if (!is_gimple_assign (last_stmt))
1951 return NULL;
1953 rhs_code = gimple_assign_rhs_code (last_stmt);
1954 switch (rhs_code)
1956 case LROTATE_EXPR:
1957 case RROTATE_EXPR:
1958 break;
1959 default:
1960 return NULL;
1963 lhs = gimple_assign_lhs (last_stmt);
1964 oprnd0 = gimple_assign_rhs1 (last_stmt);
1965 type = TREE_TYPE (oprnd0);
1966 oprnd1 = gimple_assign_rhs2 (last_stmt);
1967 if (TREE_CODE (oprnd0) != SSA_NAME
1968 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1969 || !INTEGRAL_TYPE_P (type)
1970 || !TYPE_UNSIGNED (type))
1971 return NULL;
1973 stmt_vec_info def_stmt_info;
1974 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
1975 return NULL;
1977 if (dt != vect_internal_def
1978 && dt != vect_constant_def
1979 && dt != vect_external_def)
1980 return NULL;
1982 vectype = get_vectype_for_scalar_type (type);
1983 if (vectype == NULL_TREE)
1984 return NULL;
1986 /* If vector/vector or vector/scalar rotate is supported by the target,
1987 don't do anything here. */
1988 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1989 if (optab1
1990 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1991 return NULL;
1993 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1995 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1996 if (optab2
1997 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1998 return NULL;
2001 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2002 don't do anything here either. */
2003 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2004 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2005 if (!optab1
2006 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2007 || !optab2
2008 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2010 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2011 return NULL;
2012 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2013 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2014 if (!optab1
2015 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2016 || !optab2
2017 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2018 return NULL;
2021 *type_out = vectype;
2023 if (dt == vect_external_def
2024 && TREE_CODE (oprnd1) == SSA_NAME)
2025 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2027 def = NULL_TREE;
2028 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2029 if (TREE_CODE (oprnd1) == INTEGER_CST
2030 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2031 def = oprnd1;
2032 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2034 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2035 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2036 && TYPE_PRECISION (TREE_TYPE (rhs1))
2037 == TYPE_PRECISION (type))
2038 def = rhs1;
2041 if (def == NULL_TREE)
2043 def = vect_recog_temp_ssa_var (type, NULL);
2044 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2045 if (ext_def)
2047 basic_block new_bb
2048 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2049 gcc_assert (!new_bb);
2051 else
2052 append_pattern_def_seq (stmt_vinfo, def_stmt);
2054 stype = TREE_TYPE (def);
2055 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2057 if (TREE_CODE (def) == INTEGER_CST)
2059 if (!tree_fits_uhwi_p (def)
2060 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2061 || integer_zerop (def))
2062 return NULL;
2063 def2 = build_int_cst (stype,
2064 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2066 else
2068 tree vecstype = get_vectype_for_scalar_type (stype);
2070 if (vecstype == NULL_TREE)
2071 return NULL;
2072 def2 = vect_recog_temp_ssa_var (stype, NULL);
2073 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2074 if (ext_def)
2076 basic_block new_bb
2077 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2078 gcc_assert (!new_bb);
2080 else
2081 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2083 def2 = vect_recog_temp_ssa_var (stype, NULL);
2084 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2085 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2086 gimple_assign_lhs (def_stmt), mask);
2087 if (ext_def)
2089 basic_block new_bb
2090 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2091 gcc_assert (!new_bb);
2093 else
2094 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2097 var1 = vect_recog_temp_ssa_var (type, NULL);
2098 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2099 ? LSHIFT_EXPR : RSHIFT_EXPR,
2100 oprnd0, def);
2101 append_pattern_def_seq (stmt_vinfo, def_stmt);
2103 var2 = vect_recog_temp_ssa_var (type, NULL);
2104 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2105 ? RSHIFT_EXPR : LSHIFT_EXPR,
2106 oprnd0, def2);
2107 append_pattern_def_seq (stmt_vinfo, def_stmt);
2109 /* Pattern detected. */
2110 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2112 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2113 var = vect_recog_temp_ssa_var (type, NULL);
2114 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2116 return pattern_stmt;
2119 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2120 vectorized:
2122 type a_t;
2123 TYPE b_T, res_T;
2125 S1 a_t = ;
2126 S2 b_T = ;
2127 S3 res_T = b_T op a_t;
2129 where type 'TYPE' is a type with different size than 'type',
2130 and op is <<, >> or rotate.
2132 Also detect cases:
2134 type a_t;
2135 TYPE b_T, c_T, res_T;
2137 S0 c_T = ;
2138 S1 a_t = (type) c_T;
2139 S2 b_T = ;
2140 S3 res_T = b_T op a_t;
2142 Input/Output:
2144 * STMT_VINFO: The stmt from which the pattern search begins,
2145 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2146 with a shift/rotate which has same type on both operands, in the
2147 second case just b_T op c_T, in the first case with added cast
2148 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2150 Output:
2152 * TYPE_OUT: The type of the output of this pattern.
2154 * Return value: A new stmt that will be used to replace the shift/rotate
2155 S3 stmt. */
2157 static gimple *
2158 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2159 tree *type_out)
2161 gimple *last_stmt = stmt_vinfo->stmt;
2162 tree oprnd0, oprnd1, lhs, var;
2163 gimple *pattern_stmt;
2164 enum tree_code rhs_code;
2165 vec_info *vinfo = stmt_vinfo->vinfo;
2167 if (!is_gimple_assign (last_stmt))
2168 return NULL;
2170 rhs_code = gimple_assign_rhs_code (last_stmt);
2171 switch (rhs_code)
2173 case LSHIFT_EXPR:
2174 case RSHIFT_EXPR:
2175 case LROTATE_EXPR:
2176 case RROTATE_EXPR:
2177 break;
2178 default:
2179 return NULL;
2182 lhs = gimple_assign_lhs (last_stmt);
2183 oprnd0 = gimple_assign_rhs1 (last_stmt);
2184 oprnd1 = gimple_assign_rhs2 (last_stmt);
2185 if (TREE_CODE (oprnd0) != SSA_NAME
2186 || TREE_CODE (oprnd1) != SSA_NAME
2187 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2188 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2189 || TYPE_PRECISION (TREE_TYPE (lhs))
2190 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2191 return NULL;
2193 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2194 if (!def_vinfo)
2195 return NULL;
2197 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2198 if (*type_out == NULL_TREE)
2199 return NULL;
2201 tree def = NULL_TREE;
2202 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2203 if (def_stmt && gimple_assign_cast_p (def_stmt))
2205 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2206 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2207 && TYPE_PRECISION (TREE_TYPE (rhs1))
2208 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2210 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2211 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2212 def = rhs1;
2213 else
2215 tree mask
2216 = build_low_bits_mask (TREE_TYPE (rhs1),
2217 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2218 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2219 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2220 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2221 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2226 if (def == NULL_TREE)
2228 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2229 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2230 append_pattern_def_seq (stmt_vinfo, def_stmt);
2233 /* Pattern detected. */
2234 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2236 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2237 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2238 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2240 return pattern_stmt;
2243 /* Return true iff the target has a vector optab implementing the operation
2244 CODE on type VECTYPE. */
2246 static bool
2247 target_has_vecop_for_code (tree_code code, tree vectype)
2249 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2250 return voptab
2251 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2254 /* Verify that the target has optabs of VECTYPE to perform all the steps
2255 needed by the multiplication-by-immediate synthesis algorithm described by
2256 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2257 present. Return true iff the target supports all the steps. */
2259 static bool
2260 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2261 tree vectype, bool synth_shift_p)
2263 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2264 return false;
2266 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2267 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2269 if (var == negate_variant
2270 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2271 return false;
2273 /* If we must synthesize shifts with additions make sure that vector
2274 addition is available. */
2275 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2276 return false;
2278 for (int i = 1; i < alg->ops; i++)
2280 switch (alg->op[i])
2282 case alg_shift:
2283 break;
2284 case alg_add_t_m2:
2285 case alg_add_t2_m:
2286 case alg_add_factor:
2287 if (!supports_vplus)
2288 return false;
2289 break;
2290 case alg_sub_t_m2:
2291 case alg_sub_t2_m:
2292 case alg_sub_factor:
2293 if (!supports_vminus)
2294 return false;
2295 break;
2296 case alg_unknown:
2297 case alg_m:
2298 case alg_zero:
2299 case alg_impossible:
2300 return false;
2301 default:
2302 gcc_unreachable ();
2306 return true;
2309 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2310 putting the final result in DEST. Append all statements but the last into
2311 VINFO. Return the last statement. */
2313 static gimple *
2314 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2315 stmt_vec_info vinfo)
2317 HOST_WIDE_INT i;
2318 tree itype = TREE_TYPE (op);
2319 tree prev_res = op;
2320 gcc_assert (amnt >= 0);
2321 for (i = 0; i < amnt; i++)
2323 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2324 : dest;
2325 gimple *stmt
2326 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2327 prev_res = tmp_var;
2328 if (i < amnt - 1)
2329 append_pattern_def_seq (vinfo, stmt);
2330 else
2331 return stmt;
2333 gcc_unreachable ();
2334 return NULL;
2337 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2338 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2339 the process if necessary. Append the resulting assignment statements
2340 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2341 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2342 left shifts using additions. */
2344 static tree
2345 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2346 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2348 if (integer_zerop (op2)
2349 && (code == LSHIFT_EXPR
2350 || code == PLUS_EXPR))
2352 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2353 return op1;
2356 gimple *stmt;
2357 tree itype = TREE_TYPE (op1);
2358 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2360 if (code == LSHIFT_EXPR
2361 && synth_shift_p)
2363 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2364 stmt_vinfo);
2365 append_pattern_def_seq (stmt_vinfo, stmt);
2366 return tmp_var;
2369 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2370 append_pattern_def_seq (stmt_vinfo, stmt);
2371 return tmp_var;
2374 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2375 and simple arithmetic operations to be vectorized. Record the statements
2376 produced in STMT_VINFO and return the last statement in the sequence or
2377 NULL if it's not possible to synthesize such a multiplication.
2378 This function mirrors the behavior of expand_mult_const in expmed.c but
2379 works on tree-ssa form. */
2381 static gimple *
2382 vect_synth_mult_by_constant (tree op, tree val,
2383 stmt_vec_info stmt_vinfo)
2385 tree itype = TREE_TYPE (op);
2386 machine_mode mode = TYPE_MODE (itype);
2387 struct algorithm alg;
2388 mult_variant variant;
2389 if (!tree_fits_shwi_p (val))
2390 return NULL;
2392 /* Multiplication synthesis by shifts, adds and subs can introduce
2393 signed overflow where the original operation didn't. Perform the
2394 operations on an unsigned type and cast back to avoid this.
2395 In the future we may want to relax this for synthesis algorithms
2396 that we can prove do not cause unexpected overflow. */
2397 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2399 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2401 /* Targets that don't support vector shifts but support vector additions
2402 can synthesize shifts that way. */
2403 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2405 HOST_WIDE_INT hwval = tree_to_shwi (val);
2406 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2407 The vectorizer's benefit analysis will decide whether it's beneficial
2408 to do this. */
2409 bool possible = choose_mult_variant (mode, hwval, &alg,
2410 &variant, MAX_COST);
2411 if (!possible)
2412 return NULL;
2414 tree vectype = get_vectype_for_scalar_type (multtype);
2416 if (!vectype
2417 || !target_supports_mult_synth_alg (&alg, variant,
2418 vectype, synth_shift_p))
2419 return NULL;
2421 tree accumulator;
2423 /* Clear out the sequence of statements so we can populate it below. */
2424 gimple *stmt = NULL;
2426 if (cast_to_unsigned_p)
2428 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2429 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2430 append_pattern_def_seq (stmt_vinfo, stmt);
2431 op = tmp_op;
2434 if (alg.op[0] == alg_zero)
2435 accumulator = build_int_cst (multtype, 0);
2436 else
2437 accumulator = op;
2439 bool needs_fixup = (variant == negate_variant)
2440 || (variant == add_variant);
2442 for (int i = 1; i < alg.ops; i++)
2444 tree shft_log = build_int_cst (multtype, alg.log[i]);
2445 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2446 tree tmp_var = NULL_TREE;
2448 switch (alg.op[i])
2450 case alg_shift:
2451 if (synth_shift_p)
2452 stmt
2453 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2454 stmt_vinfo);
2455 else
2456 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2457 shft_log);
2458 break;
2459 case alg_add_t_m2:
2460 tmp_var
2461 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2462 stmt_vinfo, synth_shift_p);
2463 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2464 tmp_var);
2465 break;
2466 case alg_sub_t_m2:
2467 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2468 shft_log, stmt_vinfo,
2469 synth_shift_p);
2470 /* In some algorithms the first step involves zeroing the
2471 accumulator. If subtracting from such an accumulator
2472 just emit the negation directly. */
2473 if (integer_zerop (accumulator))
2474 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2475 else
2476 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2477 tmp_var);
2478 break;
2479 case alg_add_t2_m:
2480 tmp_var
2481 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2482 stmt_vinfo, synth_shift_p);
2483 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2484 break;
2485 case alg_sub_t2_m:
2486 tmp_var
2487 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2488 stmt_vinfo, synth_shift_p);
2489 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2490 break;
2491 case alg_add_factor:
2492 tmp_var
2493 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2494 stmt_vinfo, synth_shift_p);
2495 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2496 tmp_var);
2497 break;
2498 case alg_sub_factor:
2499 tmp_var
2500 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2501 stmt_vinfo, synth_shift_p);
2502 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2503 accumulator);
2504 break;
2505 default:
2506 gcc_unreachable ();
2508 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2509 but rather return it directly. */
2511 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2512 append_pattern_def_seq (stmt_vinfo, stmt);
2513 accumulator = accum_tmp;
2515 if (variant == negate_variant)
2517 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2518 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2519 accumulator = accum_tmp;
2520 if (cast_to_unsigned_p)
2521 append_pattern_def_seq (stmt_vinfo, stmt);
2523 else if (variant == add_variant)
2525 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2526 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2527 accumulator = accum_tmp;
2528 if (cast_to_unsigned_p)
2529 append_pattern_def_seq (stmt_vinfo, stmt);
2531 /* Move back to a signed if needed. */
2532 if (cast_to_unsigned_p)
2534 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2535 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2538 return stmt;
2541 /* Detect multiplication by constant and convert it into a sequence of
2542 shifts and additions, subtractions, negations. We reuse the
2543 choose_mult_variant algorithms from expmed.c
2545 Input/Output:
2547 STMT_VINFO: The stmt from which the pattern search begins,
2548 i.e. the mult stmt.
2550 Output:
2552 * TYPE_OUT: The type of the output of this pattern.
2554 * Return value: A new stmt that will be used to replace
2555 the multiplication. */
2557 static gimple *
2558 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2560 gimple *last_stmt = stmt_vinfo->stmt;
2561 tree oprnd0, oprnd1, vectype, itype;
2562 gimple *pattern_stmt;
2564 if (!is_gimple_assign (last_stmt))
2565 return NULL;
2567 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2568 return NULL;
2570 oprnd0 = gimple_assign_rhs1 (last_stmt);
2571 oprnd1 = gimple_assign_rhs2 (last_stmt);
2572 itype = TREE_TYPE (oprnd0);
2574 if (TREE_CODE (oprnd0) != SSA_NAME
2575 || TREE_CODE (oprnd1) != INTEGER_CST
2576 || !INTEGRAL_TYPE_P (itype)
2577 || !type_has_mode_precision_p (itype))
2578 return NULL;
2580 vectype = get_vectype_for_scalar_type (itype);
2581 if (vectype == NULL_TREE)
2582 return NULL;
2584 /* If the target can handle vectorized multiplication natively,
2585 don't attempt to optimize this. */
2586 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2587 if (mul_optab != unknown_optab)
2589 machine_mode vec_mode = TYPE_MODE (vectype);
2590 int icode = (int) optab_handler (mul_optab, vec_mode);
2591 if (icode != CODE_FOR_nothing)
2592 return NULL;
2595 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2596 if (!pattern_stmt)
2597 return NULL;
2599 /* Pattern detected. */
2600 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2602 *type_out = vectype;
2604 return pattern_stmt;
2607 /* Detect a signed division by a constant that wouldn't be
2608 otherwise vectorized:
2610 type a_t, b_t;
2612 S1 a_t = b_t / N;
2614 where type 'type' is an integral type and N is a constant.
2616 Similarly handle modulo by a constant:
2618 S4 a_t = b_t % N;
2620 Input/Output:
2622 * STMT_VINFO: The stmt from which the pattern search begins,
2623 i.e. the division stmt. S1 is replaced by if N is a power
2624 of two constant and type is signed:
2625 S3 y_t = b_t < 0 ? N - 1 : 0;
2626 S2 x_t = b_t + y_t;
2627 S1' a_t = x_t >> log2 (N);
2629 S4 is replaced if N is a power of two constant and
2630 type is signed by (where *_T temporaries have unsigned type):
2631 S9 y_T = b_t < 0 ? -1U : 0U;
2632 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2633 S7 z_t = (type) z_T;
2634 S6 w_t = b_t + z_t;
2635 S5 x_t = w_t & (N - 1);
2636 S4' a_t = x_t - z_t;
2638 Output:
2640 * TYPE_OUT: The type of the output of this pattern.
2642 * Return value: A new stmt that will be used to replace the division
2643 S1 or modulo S4 stmt. */
2645 static gimple *
2646 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2648 gimple *last_stmt = stmt_vinfo->stmt;
2649 tree oprnd0, oprnd1, vectype, itype, cond;
2650 gimple *pattern_stmt, *def_stmt;
2651 enum tree_code rhs_code;
2652 optab optab;
2653 tree q;
2654 int dummy_int, prec;
2656 if (!is_gimple_assign (last_stmt))
2657 return NULL;
2659 rhs_code = gimple_assign_rhs_code (last_stmt);
2660 switch (rhs_code)
2662 case TRUNC_DIV_EXPR:
2663 case EXACT_DIV_EXPR:
2664 case TRUNC_MOD_EXPR:
2665 break;
2666 default:
2667 return NULL;
2670 oprnd0 = gimple_assign_rhs1 (last_stmt);
2671 oprnd1 = gimple_assign_rhs2 (last_stmt);
2672 itype = TREE_TYPE (oprnd0);
2673 if (TREE_CODE (oprnd0) != SSA_NAME
2674 || TREE_CODE (oprnd1) != INTEGER_CST
2675 || TREE_CODE (itype) != INTEGER_TYPE
2676 || !type_has_mode_precision_p (itype))
2677 return NULL;
2679 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2680 vectype = get_vectype_for_scalar_type (itype);
2681 if (vectype == NULL_TREE)
2682 return NULL;
2684 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2686 /* If the target can handle vectorized division or modulo natively,
2687 don't attempt to optimize this, since native division is likely
2688 to give smaller code. */
2689 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2690 if (optab != unknown_optab)
2692 machine_mode vec_mode = TYPE_MODE (vectype);
2693 int icode = (int) optab_handler (optab, vec_mode);
2694 if (icode != CODE_FOR_nothing)
2695 return NULL;
2699 prec = TYPE_PRECISION (itype);
2700 if (integer_pow2p (oprnd1))
2702 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2703 return NULL;
2705 /* Pattern detected. */
2706 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2708 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2709 build_int_cst (itype, 0));
2710 if (rhs_code == TRUNC_DIV_EXPR
2711 || rhs_code == EXACT_DIV_EXPR)
2713 tree var = vect_recog_temp_ssa_var (itype, NULL);
2714 tree shift;
2715 def_stmt
2716 = gimple_build_assign (var, COND_EXPR, cond,
2717 fold_build2 (MINUS_EXPR, itype, oprnd1,
2718 build_int_cst (itype, 1)),
2719 build_int_cst (itype, 0));
2720 append_pattern_def_seq (stmt_vinfo, def_stmt);
2721 var = vect_recog_temp_ssa_var (itype, NULL);
2722 def_stmt
2723 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2724 gimple_assign_lhs (def_stmt));
2725 append_pattern_def_seq (stmt_vinfo, def_stmt);
2727 shift = build_int_cst (itype, tree_log2 (oprnd1));
2728 pattern_stmt
2729 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2730 RSHIFT_EXPR, var, shift);
2732 else
2734 tree signmask;
2735 if (compare_tree_int (oprnd1, 2) == 0)
2737 signmask = vect_recog_temp_ssa_var (itype, NULL);
2738 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2739 build_int_cst (itype, 1),
2740 build_int_cst (itype, 0));
2741 append_pattern_def_seq (stmt_vinfo, def_stmt);
2743 else
2745 tree utype
2746 = build_nonstandard_integer_type (prec, 1);
2747 tree vecutype = get_vectype_for_scalar_type (utype);
2748 tree shift
2749 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2750 - tree_log2 (oprnd1));
2751 tree var = vect_recog_temp_ssa_var (utype, NULL);
2753 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2754 build_int_cst (utype, -1),
2755 build_int_cst (utype, 0));
2756 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2757 var = vect_recog_temp_ssa_var (utype, NULL);
2758 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2759 gimple_assign_lhs (def_stmt),
2760 shift);
2761 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2762 signmask = vect_recog_temp_ssa_var (itype, NULL);
2763 def_stmt
2764 = gimple_build_assign (signmask, NOP_EXPR, var);
2765 append_pattern_def_seq (stmt_vinfo, def_stmt);
2767 def_stmt
2768 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2769 PLUS_EXPR, oprnd0, signmask);
2770 append_pattern_def_seq (stmt_vinfo, def_stmt);
2771 def_stmt
2772 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2773 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2774 fold_build2 (MINUS_EXPR, itype, oprnd1,
2775 build_int_cst (itype, 1)));
2776 append_pattern_def_seq (stmt_vinfo, def_stmt);
2778 pattern_stmt
2779 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2780 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2781 signmask);
2784 *type_out = vectype;
2785 return pattern_stmt;
2788 if (prec > HOST_BITS_PER_WIDE_INT
2789 || integer_zerop (oprnd1))
2790 return NULL;
2792 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2793 return NULL;
2795 if (TYPE_UNSIGNED (itype))
2797 unsigned HOST_WIDE_INT mh, ml;
2798 int pre_shift, post_shift;
2799 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2800 & GET_MODE_MASK (itype_mode));
2801 tree t1, t2, t3, t4;
2803 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2804 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2805 return NULL;
2807 /* Find a suitable multiplier and right shift count
2808 instead of multiplying with D. */
2809 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2811 /* If the suggested multiplier is more than SIZE bits, we can do better
2812 for even divisors, using an initial right shift. */
2813 if (mh != 0 && (d & 1) == 0)
2815 pre_shift = ctz_or_zero (d);
2816 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2817 &ml, &post_shift, &dummy_int);
2818 gcc_assert (!mh);
2820 else
2821 pre_shift = 0;
2823 if (mh != 0)
2825 if (post_shift - 1 >= prec)
2826 return NULL;
2828 /* t1 = oprnd0 h* ml;
2829 t2 = oprnd0 - t1;
2830 t3 = t2 >> 1;
2831 t4 = t1 + t3;
2832 q = t4 >> (post_shift - 1); */
2833 t1 = vect_recog_temp_ssa_var (itype, NULL);
2834 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2835 build_int_cst (itype, ml));
2836 append_pattern_def_seq (stmt_vinfo, def_stmt);
2838 t2 = vect_recog_temp_ssa_var (itype, NULL);
2839 def_stmt
2840 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2841 append_pattern_def_seq (stmt_vinfo, def_stmt);
2843 t3 = vect_recog_temp_ssa_var (itype, NULL);
2844 def_stmt
2845 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2846 append_pattern_def_seq (stmt_vinfo, def_stmt);
2848 t4 = vect_recog_temp_ssa_var (itype, NULL);
2849 def_stmt
2850 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2852 if (post_shift != 1)
2854 append_pattern_def_seq (stmt_vinfo, def_stmt);
2856 q = vect_recog_temp_ssa_var (itype, NULL);
2857 pattern_stmt
2858 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2859 build_int_cst (itype, post_shift - 1));
2861 else
2863 q = t4;
2864 pattern_stmt = def_stmt;
2867 else
2869 if (pre_shift >= prec || post_shift >= prec)
2870 return NULL;
2872 /* t1 = oprnd0 >> pre_shift;
2873 t2 = t1 h* ml;
2874 q = t2 >> post_shift; */
2875 if (pre_shift)
2877 t1 = vect_recog_temp_ssa_var (itype, NULL);
2878 def_stmt
2879 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2880 build_int_cst (NULL, pre_shift));
2881 append_pattern_def_seq (stmt_vinfo, def_stmt);
2883 else
2884 t1 = oprnd0;
2886 t2 = vect_recog_temp_ssa_var (itype, NULL);
2887 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2888 build_int_cst (itype, ml));
2890 if (post_shift)
2892 append_pattern_def_seq (stmt_vinfo, def_stmt);
2894 q = vect_recog_temp_ssa_var (itype, NULL);
2895 def_stmt
2896 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2897 build_int_cst (itype, post_shift));
2899 else
2900 q = t2;
2902 pattern_stmt = def_stmt;
2905 else
2907 unsigned HOST_WIDE_INT ml;
2908 int post_shift;
2909 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2910 unsigned HOST_WIDE_INT abs_d;
2911 bool add = false;
2912 tree t1, t2, t3, t4;
2914 /* Give up for -1. */
2915 if (d == -1)
2916 return NULL;
2918 /* Since d might be INT_MIN, we have to cast to
2919 unsigned HOST_WIDE_INT before negating to avoid
2920 undefined signed overflow. */
2921 abs_d = (d >= 0
2922 ? (unsigned HOST_WIDE_INT) d
2923 : - (unsigned HOST_WIDE_INT) d);
2925 /* n rem d = n rem -d */
2926 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2928 d = abs_d;
2929 oprnd1 = build_int_cst (itype, abs_d);
2931 else if (HOST_BITS_PER_WIDE_INT >= prec
2932 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2933 /* This case is not handled correctly below. */
2934 return NULL;
2936 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2937 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2939 add = true;
2940 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2942 if (post_shift >= prec)
2943 return NULL;
2945 /* t1 = oprnd0 h* ml; */
2946 t1 = vect_recog_temp_ssa_var (itype, NULL);
2947 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2948 build_int_cst (itype, ml));
2950 if (add)
2952 /* t2 = t1 + oprnd0; */
2953 append_pattern_def_seq (stmt_vinfo, def_stmt);
2954 t2 = vect_recog_temp_ssa_var (itype, NULL);
2955 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2957 else
2958 t2 = t1;
2960 if (post_shift)
2962 /* t3 = t2 >> post_shift; */
2963 append_pattern_def_seq (stmt_vinfo, def_stmt);
2964 t3 = vect_recog_temp_ssa_var (itype, NULL);
2965 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2966 build_int_cst (itype, post_shift));
2968 else
2969 t3 = t2;
2971 wide_int oprnd0_min, oprnd0_max;
2972 int msb = 1;
2973 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2975 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2976 msb = 0;
2977 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2978 msb = -1;
2981 if (msb == 0 && d >= 0)
2983 /* q = t3; */
2984 q = t3;
2985 pattern_stmt = def_stmt;
2987 else
2989 /* t4 = oprnd0 >> (prec - 1);
2990 or if we know from VRP that oprnd0 >= 0
2991 t4 = 0;
2992 or if we know from VRP that oprnd0 < 0
2993 t4 = -1; */
2994 append_pattern_def_seq (stmt_vinfo, def_stmt);
2995 t4 = vect_recog_temp_ssa_var (itype, NULL);
2996 if (msb != 1)
2997 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2998 build_int_cst (itype, msb));
2999 else
3000 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3001 build_int_cst (itype, prec - 1));
3002 append_pattern_def_seq (stmt_vinfo, def_stmt);
3004 /* q = t3 - t4; or q = t4 - t3; */
3005 q = vect_recog_temp_ssa_var (itype, NULL);
3006 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3007 d < 0 ? t3 : t4);
3011 if (rhs_code == TRUNC_MOD_EXPR)
3013 tree r, t1;
3015 /* We divided. Now finish by:
3016 t1 = q * oprnd1;
3017 r = oprnd0 - t1; */
3018 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3020 t1 = vect_recog_temp_ssa_var (itype, NULL);
3021 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3022 append_pattern_def_seq (stmt_vinfo, def_stmt);
3024 r = vect_recog_temp_ssa_var (itype, NULL);
3025 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3028 /* Pattern detected. */
3029 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3031 *type_out = vectype;
3032 return pattern_stmt;
3035 /* Function vect_recog_mixed_size_cond_pattern
3037 Try to find the following pattern:
3039 type x_t, y_t;
3040 TYPE a_T, b_T, c_T;
3041 loop:
3042 S1 a_T = x_t CMP y_t ? b_T : c_T;
3044 where type 'TYPE' is an integral type which has different size
3045 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3046 than 'type', the constants need to fit into an integer type
3047 with the same width as 'type') or results of conversion from 'type'.
3049 Input:
3051 * STMT_VINFO: The stmt from which the pattern search begins.
3053 Output:
3055 * TYPE_OUT: The type of the output of this pattern.
3057 * Return value: A new stmt that will be used to replace the pattern.
3058 Additionally a def_stmt is added.
3060 a_it = x_t CMP y_t ? b_it : c_it;
3061 a_T = (TYPE) a_it; */
3063 static gimple *
3064 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3066 gimple *last_stmt = stmt_vinfo->stmt;
3067 tree cond_expr, then_clause, else_clause;
3068 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3069 gimple *pattern_stmt, *def_stmt;
3070 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3071 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3072 bool promotion;
3073 tree comp_scalar_type;
3075 if (!is_gimple_assign (last_stmt)
3076 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3077 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3078 return NULL;
3080 cond_expr = gimple_assign_rhs1 (last_stmt);
3081 then_clause = gimple_assign_rhs2 (last_stmt);
3082 else_clause = gimple_assign_rhs3 (last_stmt);
3084 if (!COMPARISON_CLASS_P (cond_expr))
3085 return NULL;
3087 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3088 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3089 if (comp_vectype == NULL_TREE)
3090 return NULL;
3092 type = gimple_expr_type (last_stmt);
3093 if (types_compatible_p (type, comp_scalar_type)
3094 || ((TREE_CODE (then_clause) != INTEGER_CST
3095 || TREE_CODE (else_clause) != INTEGER_CST)
3096 && !INTEGRAL_TYPE_P (comp_scalar_type))
3097 || !INTEGRAL_TYPE_P (type))
3098 return NULL;
3100 if ((TREE_CODE (then_clause) != INTEGER_CST
3101 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3102 &def_stmt0, &promotion))
3103 || (TREE_CODE (else_clause) != INTEGER_CST
3104 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3105 &def_stmt1, &promotion)))
3106 return NULL;
3108 if (orig_type0 && orig_type1
3109 && !types_compatible_p (orig_type0, orig_type1))
3110 return NULL;
3112 if (orig_type0)
3114 if (!types_compatible_p (orig_type0, comp_scalar_type))
3115 return NULL;
3116 then_clause = gimple_assign_rhs1 (def_stmt0);
3117 itype = orig_type0;
3120 if (orig_type1)
3122 if (!types_compatible_p (orig_type1, comp_scalar_type))
3123 return NULL;
3124 else_clause = gimple_assign_rhs1 (def_stmt1);
3125 itype = orig_type1;
3129 HOST_WIDE_INT cmp_mode_size
3130 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3132 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3133 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3134 return NULL;
3136 vectype = get_vectype_for_scalar_type (type);
3137 if (vectype == NULL_TREE)
3138 return NULL;
3140 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3141 return NULL;
3143 if (itype == NULL_TREE)
3144 itype = build_nonstandard_integer_type (cmp_mode_size,
3145 TYPE_UNSIGNED (type));
3147 if (itype == NULL_TREE
3148 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3149 return NULL;
3151 vecitype = get_vectype_for_scalar_type (itype);
3152 if (vecitype == NULL_TREE)
3153 return NULL;
3155 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3156 return NULL;
3158 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3160 if ((TREE_CODE (then_clause) == INTEGER_CST
3161 && !int_fits_type_p (then_clause, itype))
3162 || (TREE_CODE (else_clause) == INTEGER_CST
3163 && !int_fits_type_p (else_clause, itype)))
3164 return NULL;
3167 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3168 COND_EXPR, unshare_expr (cond_expr),
3169 fold_convert (itype, then_clause),
3170 fold_convert (itype, else_clause));
3171 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3172 NOP_EXPR, gimple_assign_lhs (def_stmt));
3174 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3175 *type_out = vectype;
3177 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3179 return pattern_stmt;
3183 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3184 true if bool VAR can and should be optimized that way. Assume it shouldn't
3185 in case it's a result of a comparison which can be directly vectorized into
3186 a vector comparison. Fills in STMTS with all stmts visited during the
3187 walk. */
3189 static bool
3190 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3192 tree rhs1;
3193 enum tree_code rhs_code;
3195 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3196 if (!def_stmt_info)
3197 return false;
3199 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3200 if (!def_stmt)
3201 return false;
3203 if (stmts.contains (def_stmt))
3204 return true;
3206 rhs1 = gimple_assign_rhs1 (def_stmt);
3207 rhs_code = gimple_assign_rhs_code (def_stmt);
3208 switch (rhs_code)
3210 case SSA_NAME:
3211 if (! check_bool_pattern (rhs1, vinfo, stmts))
3212 return false;
3213 break;
3215 CASE_CONVERT:
3216 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3217 return false;
3218 if (! check_bool_pattern (rhs1, vinfo, stmts))
3219 return false;
3220 break;
3222 case BIT_NOT_EXPR:
3223 if (! check_bool_pattern (rhs1, vinfo, stmts))
3224 return false;
3225 break;
3227 case BIT_AND_EXPR:
3228 case BIT_IOR_EXPR:
3229 case BIT_XOR_EXPR:
3230 if (! check_bool_pattern (rhs1, vinfo, stmts)
3231 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3232 return false;
3233 break;
3235 default:
3236 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3238 tree vecitype, comp_vectype;
3240 /* If the comparison can throw, then is_gimple_condexpr will be
3241 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3242 if (stmt_could_throw_p (cfun, def_stmt))
3243 return false;
3245 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3246 if (comp_vectype == NULL_TREE)
3247 return false;
3249 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3250 if (mask_type
3251 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3252 return false;
3254 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3256 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3257 tree itype
3258 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3259 vecitype = get_vectype_for_scalar_type (itype);
3260 if (vecitype == NULL_TREE)
3261 return false;
3263 else
3264 vecitype = comp_vectype;
3265 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3266 return false;
3268 else
3269 return false;
3270 break;
3273 bool res = stmts.add (def_stmt);
3274 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3275 gcc_assert (!res);
3277 return true;
3281 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3282 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3283 pattern sequence. */
3285 static tree
3286 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3288 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3289 NOP_EXPR, var);
3290 append_pattern_def_seq (stmt_info, cast_stmt,
3291 get_vectype_for_scalar_type (type));
3292 return gimple_assign_lhs (cast_stmt);
3295 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3296 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3297 type, OUT_TYPE is the desired final integer type of the whole pattern.
3298 STMT_INFO is the info of the pattern root and is where pattern stmts should
3299 be associated with. DEFS is a map of pattern defs. */
3301 static void
3302 adjust_bool_pattern (tree var, tree out_type,
3303 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3305 gimple *stmt = SSA_NAME_DEF_STMT (var);
3306 enum tree_code rhs_code, def_rhs_code;
3307 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3308 location_t loc;
3309 gimple *pattern_stmt, *def_stmt;
3310 tree trueval = NULL_TREE;
3312 rhs1 = gimple_assign_rhs1 (stmt);
3313 rhs2 = gimple_assign_rhs2 (stmt);
3314 rhs_code = gimple_assign_rhs_code (stmt);
3315 loc = gimple_location (stmt);
3316 switch (rhs_code)
3318 case SSA_NAME:
3319 CASE_CONVERT:
3320 irhs1 = *defs.get (rhs1);
3321 itype = TREE_TYPE (irhs1);
3322 pattern_stmt
3323 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3324 SSA_NAME, irhs1);
3325 break;
3327 case BIT_NOT_EXPR:
3328 irhs1 = *defs.get (rhs1);
3329 itype = TREE_TYPE (irhs1);
3330 pattern_stmt
3331 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3332 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3333 break;
3335 case BIT_AND_EXPR:
3336 /* Try to optimize x = y & (a < b ? 1 : 0); into
3337 x = (a < b ? y : 0);
3339 E.g. for:
3340 bool a_b, b_b, c_b;
3341 TYPE d_T;
3343 S1 a_b = x1 CMP1 y1;
3344 S2 b_b = x2 CMP2 y2;
3345 S3 c_b = a_b & b_b;
3346 S4 d_T = (TYPE) c_b;
3348 we would normally emit:
3350 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3351 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3352 S3' c_T = a_T & b_T;
3353 S4' d_T = c_T;
3355 but we can save one stmt by using the
3356 result of one of the COND_EXPRs in the other COND_EXPR and leave
3357 BIT_AND_EXPR stmt out:
3359 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3360 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3361 S4' f_T = c_T;
3363 At least when VEC_COND_EXPR is implemented using masks
3364 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3365 computes the comparison masks and ands it, in one case with
3366 all ones vector, in the other case with a vector register.
3367 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3368 often more expensive. */
3369 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3370 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3371 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3373 irhs1 = *defs.get (rhs1);
3374 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3375 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3376 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3378 rhs_code = def_rhs_code;
3379 rhs1 = def_rhs1;
3380 rhs2 = gimple_assign_rhs2 (def_stmt);
3381 trueval = irhs1;
3382 goto do_compare;
3384 else
3385 irhs2 = *defs.get (rhs2);
3386 goto and_ior_xor;
3388 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3389 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3390 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3392 irhs2 = *defs.get (rhs2);
3393 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3394 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3395 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3397 rhs_code = def_rhs_code;
3398 rhs1 = def_rhs1;
3399 rhs2 = gimple_assign_rhs2 (def_stmt);
3400 trueval = irhs2;
3401 goto do_compare;
3403 else
3404 irhs1 = *defs.get (rhs1);
3405 goto and_ior_xor;
3407 /* FALLTHRU */
3408 case BIT_IOR_EXPR:
3409 case BIT_XOR_EXPR:
3410 irhs1 = *defs.get (rhs1);
3411 irhs2 = *defs.get (rhs2);
3412 and_ior_xor:
3413 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3414 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3416 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3417 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3418 int out_prec = TYPE_PRECISION (out_type);
3419 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3420 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3421 stmt_info);
3422 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3423 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3424 stmt_info);
3425 else
3427 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3428 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3431 itype = TREE_TYPE (irhs1);
3432 pattern_stmt
3433 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3434 rhs_code, irhs1, irhs2);
3435 break;
3437 default:
3438 do_compare:
3439 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3440 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3441 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3442 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3443 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3445 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3446 itype
3447 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3449 else
3450 itype = TREE_TYPE (rhs1);
3451 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3452 if (trueval == NULL_TREE)
3453 trueval = build_int_cst (itype, 1);
3454 else
3455 gcc_checking_assert (useless_type_conversion_p (itype,
3456 TREE_TYPE (trueval)));
3457 pattern_stmt
3458 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3459 COND_EXPR, cond_expr, trueval,
3460 build_int_cst (itype, 0));
3461 break;
3464 gimple_set_location (pattern_stmt, loc);
3465 append_pattern_def_seq (stmt_info, pattern_stmt,
3466 get_vectype_for_scalar_type (itype));
3467 defs.put (var, gimple_assign_lhs (pattern_stmt));
3470 /* Comparison function to qsort a vector of gimple stmts after UID. */
3472 static int
3473 sort_after_uid (const void *p1, const void *p2)
3475 const gimple *stmt1 = *(const gimple * const *)p1;
3476 const gimple *stmt2 = *(const gimple * const *)p2;
3477 return gimple_uid (stmt1) - gimple_uid (stmt2);
3480 /* Create pattern stmts for all stmts participating in the bool pattern
3481 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3482 OUT_TYPE. Return the def of the pattern root. */
3484 static tree
3485 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3486 tree out_type, stmt_vec_info stmt_info)
3488 /* Gather original stmts in the bool pattern in their order of appearance
3489 in the IL. */
3490 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3491 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3492 i != bool_stmt_set.end (); ++i)
3493 bool_stmts.quick_push (*i);
3494 bool_stmts.qsort (sort_after_uid);
3496 /* Now process them in that order, producing pattern stmts. */
3497 hash_map <tree, tree> defs;
3498 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3499 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3500 out_type, stmt_info, defs);
3502 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3503 gimple *pattern_stmt
3504 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3505 return gimple_assign_lhs (pattern_stmt);
3508 /* Helper for search_type_for_mask. */
3510 static tree
3511 search_type_for_mask_1 (tree var, vec_info *vinfo,
3512 hash_map<gimple *, tree> &cache)
3514 tree rhs1;
3515 enum tree_code rhs_code;
3516 tree res = NULL_TREE, res2;
3518 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3519 return NULL_TREE;
3521 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3522 if (!def_stmt_info)
3523 return NULL_TREE;
3525 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3526 if (!def_stmt)
3527 return NULL_TREE;
3529 tree *c = cache.get (def_stmt);
3530 if (c)
3531 return *c;
3533 rhs_code = gimple_assign_rhs_code (def_stmt);
3534 rhs1 = gimple_assign_rhs1 (def_stmt);
3536 switch (rhs_code)
3538 case SSA_NAME:
3539 case BIT_NOT_EXPR:
3540 CASE_CONVERT:
3541 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3542 break;
3544 case BIT_AND_EXPR:
3545 case BIT_IOR_EXPR:
3546 case BIT_XOR_EXPR:
3547 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3548 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3549 cache);
3550 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3551 res = res2;
3552 break;
3554 default:
3555 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3557 tree comp_vectype, mask_type;
3559 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3561 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3562 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3563 vinfo, cache);
3564 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3565 res = res2;
3566 break;
3569 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3570 if (comp_vectype == NULL_TREE)
3572 res = NULL_TREE;
3573 break;
3576 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3577 if (!mask_type
3578 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3580 res = NULL_TREE;
3581 break;
3584 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3585 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3587 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3588 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3590 else
3591 res = TREE_TYPE (rhs1);
3595 cache.put (def_stmt, res);
3596 return res;
3599 /* Return the proper type for converting bool VAR into
3600 an integer value or NULL_TREE if no such type exists.
3601 The type is chosen so that converted value has the
3602 same number of elements as VAR's vector type. */
3604 static tree
3605 search_type_for_mask (tree var, vec_info *vinfo)
3607 hash_map<gimple *, tree> cache;
3608 return search_type_for_mask_1 (var, vinfo, cache);
3611 /* Function vect_recog_bool_pattern
3613 Try to find pattern like following:
3615 bool a_b, b_b, c_b, d_b, e_b;
3616 TYPE f_T;
3617 loop:
3618 S1 a_b = x1 CMP1 y1;
3619 S2 b_b = x2 CMP2 y2;
3620 S3 c_b = a_b & b_b;
3621 S4 d_b = x3 CMP3 y3;
3622 S5 e_b = c_b | d_b;
3623 S6 f_T = (TYPE) e_b;
3625 where type 'TYPE' is an integral type. Or a similar pattern
3626 ending in
3628 S6 f_Y = e_b ? r_Y : s_Y;
3630 as results from if-conversion of a complex condition.
3632 Input:
3634 * STMT_VINFO: The stmt at the end from which the pattern
3635 search begins, i.e. cast of a bool to
3636 an integer type.
3638 Output:
3640 * TYPE_OUT: The type of the output of this pattern.
3642 * Return value: A new stmt that will be used to replace the pattern.
3644 Assuming size of TYPE is the same as size of all comparisons
3645 (otherwise some casts would be added where needed), the above
3646 sequence we create related pattern stmts:
3647 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3648 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3649 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3650 S5' e_T = c_T | d_T;
3651 S6' f_T = e_T;
3653 Instead of the above S3' we could emit:
3654 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3655 S3' c_T = a_T | b_T;
3656 but the above is more efficient. */
3658 static gimple *
3659 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3661 gimple *last_stmt = stmt_vinfo->stmt;
3662 enum tree_code rhs_code;
3663 tree var, lhs, rhs, vectype;
3664 vec_info *vinfo = stmt_vinfo->vinfo;
3665 gimple *pattern_stmt;
3667 if (!is_gimple_assign (last_stmt))
3668 return NULL;
3670 var = gimple_assign_rhs1 (last_stmt);
3671 lhs = gimple_assign_lhs (last_stmt);
3673 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3674 return NULL;
3676 hash_set<gimple *> bool_stmts;
3678 rhs_code = gimple_assign_rhs_code (last_stmt);
3679 if (CONVERT_EXPR_CODE_P (rhs_code))
3681 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3682 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3683 return NULL;
3684 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3685 if (vectype == NULL_TREE)
3686 return NULL;
3688 if (check_bool_pattern (var, vinfo, bool_stmts))
3690 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
3691 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3692 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3693 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3694 else
3695 pattern_stmt
3696 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3698 else
3700 tree type = search_type_for_mask (var, vinfo);
3701 tree cst0, cst1, tmp;
3703 if (!type)
3704 return NULL;
3706 /* We may directly use cond with narrowed type to avoid
3707 multiple cond exprs with following result packing and
3708 perform single cond with packed mask instead. In case
3709 of widening we better make cond first and then extract
3710 results. */
3711 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3712 type = TREE_TYPE (lhs);
3714 cst0 = build_int_cst (type, 0);
3715 cst1 = build_int_cst (type, 1);
3716 tmp = vect_recog_temp_ssa_var (type, NULL);
3717 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3719 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3721 tree new_vectype = get_vectype_for_scalar_type (type);
3722 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3724 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3725 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3729 *type_out = vectype;
3730 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3732 return pattern_stmt;
3734 else if (rhs_code == COND_EXPR
3735 && TREE_CODE (var) == SSA_NAME)
3737 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3738 if (vectype == NULL_TREE)
3739 return NULL;
3741 /* Build a scalar type for the boolean result that when
3742 vectorized matches the vector type of the result in
3743 size and number of elements. */
3744 unsigned prec
3745 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3746 TYPE_VECTOR_SUBPARTS (vectype));
3748 tree type
3749 = build_nonstandard_integer_type (prec,
3750 TYPE_UNSIGNED (TREE_TYPE (var)));
3751 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3752 return NULL;
3754 if (!check_bool_pattern (var, vinfo, bool_stmts))
3755 return NULL;
3757 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
3759 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3760 pattern_stmt
3761 = gimple_build_assign (lhs, COND_EXPR,
3762 build2 (NE_EXPR, boolean_type_node,
3763 rhs, build_int_cst (type, 0)),
3764 gimple_assign_rhs2 (last_stmt),
3765 gimple_assign_rhs3 (last_stmt));
3766 *type_out = vectype;
3767 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3769 return pattern_stmt;
3771 else if (rhs_code == SSA_NAME
3772 && STMT_VINFO_DATA_REF (stmt_vinfo))
3774 stmt_vec_info pattern_stmt_info;
3775 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3776 gcc_assert (vectype != NULL_TREE);
3777 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3778 return NULL;
3780 if (check_bool_pattern (var, vinfo, bool_stmts))
3781 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
3782 else
3784 tree type = search_type_for_mask (var, vinfo);
3785 tree cst0, cst1, new_vectype;
3787 if (!type)
3788 return NULL;
3790 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3791 type = TREE_TYPE (vectype);
3793 cst0 = build_int_cst (type, 0);
3794 cst1 = build_int_cst (type, 1);
3795 new_vectype = get_vectype_for_scalar_type (type);
3797 rhs = vect_recog_temp_ssa_var (type, NULL);
3798 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3799 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3802 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3803 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3805 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3806 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3807 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3808 rhs = rhs2;
3810 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3811 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3812 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3813 *type_out = vectype;
3814 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3816 return pattern_stmt;
3818 else
3819 return NULL;
3823 /* A helper for vect_recog_mask_conversion_pattern. Build
3824 conversion of MASK to a type suitable for masking VECTYPE.
3825 Built statement gets required vectype and is appended to
3826 a pattern sequence of STMT_VINFO.
3828 Return converted mask. */
3830 static tree
3831 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
3833 gimple *stmt;
3834 tree masktype, tmp;
3836 masktype = build_same_sized_truth_vector_type (vectype);
3837 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3838 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3839 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
3841 return tmp;
3845 /* Function vect_recog_mask_conversion_pattern
3847 Try to find statements which require boolean type
3848 converison. Additional conversion statements are
3849 added to handle such cases. For example:
3851 bool m_1, m_2, m_3;
3852 int i_4, i_5;
3853 double d_6, d_7;
3854 char c_1, c_2, c_3;
3856 S1 m_1 = i_4 > i_5;
3857 S2 m_2 = d_6 < d_7;
3858 S3 m_3 = m_1 & m_2;
3859 S4 c_1 = m_3 ? c_2 : c_3;
3861 Will be transformed into:
3863 S1 m_1 = i_4 > i_5;
3864 S2 m_2 = d_6 < d_7;
3865 S3'' m_2' = (_Bool[bitsize=32])m_2
3866 S3' m_3' = m_1 & m_2';
3867 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3868 S4' c_1' = m_3'' ? c_2 : c_3; */
3870 static gimple *
3871 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3873 gimple *last_stmt = stmt_vinfo->stmt;
3874 enum tree_code rhs_code;
3875 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3876 tree vectype1, vectype2;
3877 stmt_vec_info pattern_stmt_info;
3878 vec_info *vinfo = stmt_vinfo->vinfo;
3880 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3881 if (is_gimple_call (last_stmt)
3882 && gimple_call_internal_p (last_stmt))
3884 gcall *pattern_stmt;
3886 internal_fn ifn = gimple_call_internal_fn (last_stmt);
3887 int mask_argno = internal_fn_mask_index (ifn);
3888 if (mask_argno < 0)
3889 return NULL;
3891 bool store_p = internal_store_fn_p (ifn);
3892 if (store_p)
3894 int rhs_index = internal_fn_stored_value_index (ifn);
3895 tree rhs = gimple_call_arg (last_stmt, rhs_index);
3896 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
3898 else
3900 lhs = gimple_call_lhs (last_stmt);
3901 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3904 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3905 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3906 if (!mask_arg_type)
3907 return NULL;
3908 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
3910 if (!vectype1 || !vectype2
3911 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3912 TYPE_VECTOR_SUBPARTS (vectype2)))
3913 return NULL;
3915 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3917 auto_vec<tree, 8> args;
3918 unsigned int nargs = gimple_call_num_args (last_stmt);
3919 args.safe_grow (nargs);
3920 for (unsigned int i = 0; i < nargs; ++i)
3921 args[i] = ((int) i == mask_argno
3922 ? tmp
3923 : gimple_call_arg (last_stmt, i));
3924 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
3926 if (!store_p)
3928 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3929 gimple_call_set_lhs (pattern_stmt, lhs);
3931 gimple_call_set_nothrow (pattern_stmt, true);
3933 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3934 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3935 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3937 *type_out = vectype1;
3938 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3940 return pattern_stmt;
3943 if (!is_gimple_assign (last_stmt))
3944 return NULL;
3946 gimple *pattern_stmt;
3947 lhs = gimple_assign_lhs (last_stmt);
3948 rhs1 = gimple_assign_rhs1 (last_stmt);
3949 rhs_code = gimple_assign_rhs_code (last_stmt);
3951 /* Check for cond expression requiring mask conversion. */
3952 if (rhs_code == COND_EXPR)
3954 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3956 if (TREE_CODE (rhs1) == SSA_NAME)
3958 rhs1_type = search_type_for_mask (rhs1, vinfo);
3959 if (!rhs1_type)
3960 return NULL;
3962 else if (COMPARISON_CLASS_P (rhs1))
3964 /* Check whether we're comparing scalar booleans and (if so)
3965 whether a better mask type exists than the mask associated
3966 with boolean-sized elements. This avoids unnecessary packs
3967 and unpacks if the booleans are set from comparisons of
3968 wider types. E.g. in:
3970 int x1, x2, x3, x4, y1, y1;
3972 bool b1 = (x1 == x2);
3973 bool b2 = (x3 == x4);
3974 ... = b1 == b2 ? y1 : y2;
3976 it is better for b1 and b2 to use the mask type associated
3977 with int elements rather bool (byte) elements. */
3978 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
3979 if (!rhs1_type)
3980 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3982 else
3983 return NULL;
3985 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3987 if (!vectype1 || !vectype2)
3988 return NULL;
3990 /* Continue if a conversion is needed. Also continue if we have
3991 a comparison whose vector type would normally be different from
3992 VECTYPE2 when considered in isolation. In that case we'll
3993 replace the comparison with an SSA name (so that we can record
3994 its vector type) and behave as though the comparison was an SSA
3995 name from the outset. */
3996 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3997 TYPE_VECTOR_SUBPARTS (vectype2))
3998 && (TREE_CODE (rhs1) == SSA_NAME
3999 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4000 return NULL;
4002 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4003 in place, we can handle it in vectorizable_condition. This avoids
4004 unnecessary promotion stmts and increased vectorization factor. */
4005 if (COMPARISON_CLASS_P (rhs1)
4006 && INTEGRAL_TYPE_P (rhs1_type)
4007 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4008 TYPE_VECTOR_SUBPARTS (vectype2)))
4010 enum vect_def_type dt;
4011 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4012 && dt == vect_external_def
4013 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4014 && (dt == vect_external_def
4015 || dt == vect_constant_def))
4017 tree wide_scalar_type = build_nonstandard_integer_type
4018 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4019 TYPE_UNSIGNED (rhs1_type));
4020 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4021 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4022 return NULL;
4026 /* If rhs1 is a comparison we need to move it into a
4027 separate statement. */
4028 if (TREE_CODE (rhs1) != SSA_NAME)
4030 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4031 pattern_stmt = gimple_build_assign (tmp, rhs1);
4032 rhs1 = tmp;
4033 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
4036 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4037 TYPE_VECTOR_SUBPARTS (vectype2)))
4038 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4039 else
4040 tmp = rhs1;
4042 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4043 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4044 gimple_assign_rhs2 (last_stmt),
4045 gimple_assign_rhs3 (last_stmt));
4047 *type_out = vectype1;
4048 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4050 return pattern_stmt;
4053 /* Now check for binary boolean operations requiring conversion for
4054 one of operands. */
4055 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4056 return NULL;
4058 if (rhs_code != BIT_IOR_EXPR
4059 && rhs_code != BIT_XOR_EXPR
4060 && rhs_code != BIT_AND_EXPR
4061 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4062 return NULL;
4064 rhs2 = gimple_assign_rhs2 (last_stmt);
4066 rhs1_type = search_type_for_mask (rhs1, vinfo);
4067 rhs2_type = search_type_for_mask (rhs2, vinfo);
4069 if (!rhs1_type || !rhs2_type
4070 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4071 return NULL;
4073 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4075 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4076 if (!vectype1)
4077 return NULL;
4078 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4080 else
4082 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4083 if (!vectype1)
4084 return NULL;
4085 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4088 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4089 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4091 *type_out = vectype1;
4092 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4094 return pattern_stmt;
4097 /* STMT_INFO is a load or store. If the load or store is conditional, return
4098 the boolean condition under which it occurs, otherwise return null. */
4100 static tree
4101 vect_get_load_store_mask (stmt_vec_info stmt_info)
4103 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4105 gcc_assert (gimple_assign_single_p (def_assign));
4106 return NULL_TREE;
4109 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4111 internal_fn ifn = gimple_call_internal_fn (def_call);
4112 int mask_index = internal_fn_mask_index (ifn);
4113 return gimple_call_arg (def_call, mask_index);
4116 gcc_unreachable ();
4119 /* Return the scalar offset type that an internal gather/scatter function
4120 should use. GS_INFO describes the gather/scatter operation. */
4122 static tree
4123 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4125 tree offset_type = TREE_TYPE (gs_info->offset);
4126 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4128 /* Enforced by vect_check_gather_scatter. */
4129 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4130 gcc_assert (element_bits >= offset_bits);
4132 /* If the offset is narrower than the elements, extend it according
4133 to its sign. */
4134 if (element_bits > offset_bits)
4135 return build_nonstandard_integer_type (element_bits,
4136 TYPE_UNSIGNED (offset_type));
4138 return offset_type;
4141 /* Return MASK if MASK is suitable for masking an operation on vectors
4142 of type VECTYPE, otherwise convert it into such a form and return
4143 the result. Associate any conversion statements with STMT_INFO's
4144 pattern. */
4146 static tree
4147 vect_convert_mask_for_vectype (tree mask, tree vectype,
4148 stmt_vec_info stmt_info, vec_info *vinfo)
4150 tree mask_type = search_type_for_mask (mask, vinfo);
4151 if (mask_type)
4153 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4154 if (mask_vectype
4155 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4156 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4157 mask = build_mask_conversion (mask, vectype, stmt_info);
4159 return mask;
4162 /* Return the equivalent of:
4164 fold_convert (TYPE, VALUE)
4166 with the expectation that the operation will be vectorized.
4167 If new statements are needed, add them as pattern statements
4168 to STMT_INFO. */
4170 static tree
4171 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4173 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4174 return value;
4176 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4177 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4178 append_pattern_def_seq (stmt_info, conversion,
4179 get_vectype_for_scalar_type (type));
4180 return new_value;
4183 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4184 internal function. Return the final statement on success and set
4185 *TYPE_OUT to the vector type being loaded or stored.
4187 This function only handles gathers and scatters that were recognized
4188 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4190 static gimple *
4191 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4193 /* Currently we only support this for loop vectorization. */
4194 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4195 if (!loop_vinfo)
4196 return NULL;
4198 /* Make sure that we're looking at a gather load or scatter store. */
4199 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4200 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4201 return NULL;
4203 /* Get the boolean that controls whether the load or store happens.
4204 This is null if the operation is unconditional. */
4205 tree mask = vect_get_load_store_mask (stmt_info);
4207 /* Make sure that the target supports an appropriate internal
4208 function for the gather/scatter operation. */
4209 gather_scatter_info gs_info;
4210 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4211 || gs_info.decl)
4212 return NULL;
4214 /* Convert the mask to the right form. */
4215 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4216 if (mask)
4217 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4218 loop_vinfo);
4220 /* Get the invariant base and non-invariant offset, converting the
4221 latter to the same width as the vector elements. */
4222 tree base = gs_info.base;
4223 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4224 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4225 stmt_info);
4227 /* Build the new pattern statement. */
4228 tree scale = size_int (gs_info.scale);
4229 gcall *pattern_stmt;
4230 if (DR_IS_READ (dr))
4232 if (mask != NULL)
4233 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4234 offset, scale, mask);
4235 else
4236 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4237 offset, scale);
4238 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4239 gimple_call_set_lhs (pattern_stmt, load_lhs);
4241 else
4243 tree rhs = vect_get_store_rhs (stmt_info);
4244 if (mask != NULL)
4245 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4246 base, offset, scale, rhs,
4247 mask);
4248 else
4249 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4250 base, offset, scale, rhs);
4252 gimple_call_set_nothrow (pattern_stmt, true);
4254 /* Copy across relevant vectorization info and associate DR with the
4255 new pattern statement instead of the original statement. */
4256 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4257 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4259 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4260 *type_out = vectype;
4261 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4263 return pattern_stmt;
4266 /* Return true if TYPE is a non-boolean integer type. These are the types
4267 that we want to consider for narrowing. */
4269 static bool
4270 vect_narrowable_type_p (tree type)
4272 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4275 /* Return true if the operation given by CODE can be truncated to N bits
4276 when only N bits of the output are needed. This is only true if bit N+1
4277 of the inputs has no effect on the low N bits of the result. */
4279 static bool
4280 vect_truncatable_operation_p (tree_code code)
4282 switch (code)
4284 case PLUS_EXPR:
4285 case MINUS_EXPR:
4286 case MULT_EXPR:
4287 case BIT_AND_EXPR:
4288 case BIT_IOR_EXPR:
4289 case BIT_XOR_EXPR:
4290 case COND_EXPR:
4291 return true;
4293 default:
4294 return false;
4298 /* Record that STMT_INFO could be changed from operating on TYPE to
4299 operating on a type with the precision and sign given by PRECISION
4300 and SIGN respectively. PRECISION is an arbitrary bit precision;
4301 it might not be a whole number of bytes. */
4303 static void
4304 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4305 unsigned int precision, signop sign)
4307 /* Round the precision up to a whole number of bytes. */
4308 precision = vect_element_precision (precision);
4309 if (precision < TYPE_PRECISION (type)
4310 && (!stmt_info->operation_precision
4311 || stmt_info->operation_precision > precision))
4313 stmt_info->operation_precision = precision;
4314 stmt_info->operation_sign = sign;
4318 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4319 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4320 is an arbitrary bit precision; it might not be a whole number of bytes. */
4322 static void
4323 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4324 unsigned int min_input_precision)
4326 /* This operation in isolation only requires the inputs to have
4327 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4328 that MIN_INPUT_PRECISION is a natural precision for the chain
4329 as a whole. E.g. consider something like:
4331 unsigned short *x, *y;
4332 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4334 The right shift can be done on unsigned chars, and only requires the
4335 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4336 approach would mean turning a natural chain of single-vector unsigned
4337 short operations into one that truncates "*x" and then extends
4338 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4339 operation and one vector for each unsigned char operation.
4340 This would be a significant pessimization.
4342 Instead only propagate the maximum of this precision and the precision
4343 required by the users of the result. This means that we don't pessimize
4344 the case above but continue to optimize things like:
4346 unsigned char *y;
4347 unsigned short *x;
4348 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4350 Here we would truncate two vectors of *x to a single vector of
4351 unsigned chars and use single-vector unsigned char operations for
4352 everything else, rather than doing two unsigned short copies of
4353 "(*x & 0xf0) >> 4" and then truncating the result. */
4354 min_input_precision = MAX (min_input_precision,
4355 stmt_info->min_output_precision);
4357 if (min_input_precision < TYPE_PRECISION (type)
4358 && (!stmt_info->min_input_precision
4359 || stmt_info->min_input_precision > min_input_precision))
4360 stmt_info->min_input_precision = min_input_precision;
4363 /* Subroutine of vect_determine_min_output_precision. Return true if
4364 we can calculate a reduced number of output bits for STMT_INFO,
4365 whose result is LHS. */
4367 static bool
4368 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4370 /* Take the maximum precision required by users of the result. */
4371 vec_info *vinfo = stmt_info->vinfo;
4372 unsigned int precision = 0;
4373 imm_use_iterator iter;
4374 use_operand_p use;
4375 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4377 gimple *use_stmt = USE_STMT (use);
4378 if (is_gimple_debug (use_stmt))
4379 continue;
4380 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4381 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4382 return false;
4383 /* The input precision recorded for COND_EXPRs applies only to the
4384 "then" and "else" values. */
4385 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4386 if (assign
4387 && gimple_assign_rhs_code (assign) == COND_EXPR
4388 && use->use != gimple_assign_rhs2_ptr (assign)
4389 && use->use != gimple_assign_rhs3_ptr (assign))
4390 return false;
4391 precision = MAX (precision, use_stmt_info->min_input_precision);
4394 if (dump_enabled_p ())
4395 dump_printf_loc (MSG_NOTE, vect_location,
4396 "only the low %d bits of %T are significant\n",
4397 precision, lhs);
4398 stmt_info->min_output_precision = precision;
4399 return true;
4402 /* Calculate min_output_precision for STMT_INFO. */
4404 static void
4405 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4407 /* We're only interested in statements with a narrowable result. */
4408 tree lhs = gimple_get_lhs (stmt_info->stmt);
4409 if (!lhs
4410 || TREE_CODE (lhs) != SSA_NAME
4411 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4412 return;
4414 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4415 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4418 /* Use range information to decide whether STMT (described by STMT_INFO)
4419 could be done in a narrower type. This is effectively a forward
4420 propagation, since it uses context-independent information that applies
4421 to all users of an SSA name. */
4423 static void
4424 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4426 tree lhs = gimple_assign_lhs (stmt);
4427 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4428 return;
4430 tree type = TREE_TYPE (lhs);
4431 if (!vect_narrowable_type_p (type))
4432 return;
4434 /* First see whether we have any useful range information for the result. */
4435 unsigned int precision = TYPE_PRECISION (type);
4436 signop sign = TYPE_SIGN (type);
4437 wide_int min_value, max_value;
4438 if (!vect_get_range_info (lhs, &min_value, &max_value))
4439 return;
4441 tree_code code = gimple_assign_rhs_code (stmt);
4442 unsigned int nops = gimple_num_ops (stmt);
4444 if (!vect_truncatable_operation_p (code))
4445 /* Check that all relevant input operands are compatible, and update
4446 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4447 for (unsigned int i = 1; i < nops; ++i)
4449 tree op = gimple_op (stmt, i);
4450 if (TREE_CODE (op) == INTEGER_CST)
4452 /* Don't require the integer to have RHS_TYPE (which it might
4453 not for things like shift amounts, etc.), but do require it
4454 to fit the type. */
4455 if (!int_fits_type_p (op, type))
4456 return;
4458 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4459 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4461 else if (TREE_CODE (op) == SSA_NAME)
4463 /* Ignore codes that don't take uniform arguments. */
4464 if (!types_compatible_p (TREE_TYPE (op), type))
4465 return;
4467 wide_int op_min_value, op_max_value;
4468 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4469 return;
4471 min_value = wi::min (min_value, op_min_value, sign);
4472 max_value = wi::max (max_value, op_max_value, sign);
4474 else
4475 return;
4478 /* Try to switch signed types for unsigned types if we can.
4479 This is better for two reasons. First, unsigned ops tend
4480 to be cheaper than signed ops. Second, it means that we can
4481 handle things like:
4483 signed char c;
4484 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4488 signed char c;
4489 unsigned short res_1 = (unsigned short) c & 0xff00;
4490 int res = (int) res_1;
4492 where the intermediate result res_1 has unsigned rather than
4493 signed type. */
4494 if (sign == SIGNED && !wi::neg_p (min_value))
4495 sign = UNSIGNED;
4497 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4498 unsigned int precision1 = wi::min_precision (min_value, sign);
4499 unsigned int precision2 = wi::min_precision (max_value, sign);
4500 unsigned int value_precision = MAX (precision1, precision2);
4501 if (value_precision >= precision)
4502 return;
4504 if (dump_enabled_p ())
4505 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4506 " without loss of precision: %G",
4507 sign == SIGNED ? "signed" : "unsigned",
4508 value_precision, stmt);
4510 vect_set_operation_type (stmt_info, type, value_precision, sign);
4511 vect_set_min_input_precision (stmt_info, type, value_precision);
4514 /* Use information about the users of STMT's result to decide whether
4515 STMT (described by STMT_INFO) could be done in a narrower type.
4516 This is effectively a backward propagation. */
4518 static void
4519 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4521 tree_code code = gimple_assign_rhs_code (stmt);
4522 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4523 tree type = TREE_TYPE (gimple_op (stmt, opno));
4524 if (!vect_narrowable_type_p (type))
4525 return;
4527 unsigned int precision = TYPE_PRECISION (type);
4528 unsigned int operation_precision, min_input_precision;
4529 switch (code)
4531 CASE_CONVERT:
4532 /* Only the bits that contribute to the output matter. Don't change
4533 the precision of the operation itself. */
4534 operation_precision = precision;
4535 min_input_precision = stmt_info->min_output_precision;
4536 break;
4538 case LSHIFT_EXPR:
4539 case RSHIFT_EXPR:
4541 tree shift = gimple_assign_rhs2 (stmt);
4542 if (TREE_CODE (shift) != INTEGER_CST
4543 || !wi::ltu_p (wi::to_widest (shift), precision))
4544 return;
4545 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4546 if (code == LSHIFT_EXPR)
4548 /* We need CONST_SHIFT fewer bits of the input. */
4549 operation_precision = stmt_info->min_output_precision;
4550 min_input_precision = (MAX (operation_precision, const_shift)
4551 - const_shift);
4553 else
4555 /* We need CONST_SHIFT extra bits to do the operation. */
4556 operation_precision = (stmt_info->min_output_precision
4557 + const_shift);
4558 min_input_precision = operation_precision;
4560 break;
4563 default:
4564 if (vect_truncatable_operation_p (code))
4566 /* Input bit N has no effect on output bits N-1 and lower. */
4567 operation_precision = stmt_info->min_output_precision;
4568 min_input_precision = operation_precision;
4569 break;
4571 return;
4574 if (operation_precision < precision)
4576 if (dump_enabled_p ())
4577 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4578 " without affecting users: %G",
4579 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4580 operation_precision, stmt);
4581 vect_set_operation_type (stmt_info, type, operation_precision,
4582 TYPE_SIGN (type));
4584 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4587 /* Handle vect_determine_precisions for STMT_INFO, given that we
4588 have already done so for the users of its result. */
4590 void
4591 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4593 vect_determine_min_output_precision (stmt_info);
4594 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4596 vect_determine_precisions_from_range (stmt_info, stmt);
4597 vect_determine_precisions_from_users (stmt_info, stmt);
4601 /* Walk backwards through the vectorizable region to determine the
4602 values of these fields:
4604 - min_output_precision
4605 - min_input_precision
4606 - operation_precision
4607 - operation_sign. */
4609 void
4610 vect_determine_precisions (vec_info *vinfo)
4612 DUMP_VECT_SCOPE ("vect_determine_precisions");
4614 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4616 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4617 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4618 unsigned int nbbs = loop->num_nodes;
4620 for (unsigned int i = 0; i < nbbs; i++)
4622 basic_block bb = bbs[nbbs - i - 1];
4623 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4624 !gsi_end_p (si); gsi_prev (&si))
4625 vect_determine_stmt_precisions
4626 (vinfo->lookup_stmt (gsi_stmt (si)));
4629 else
4631 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4632 gimple_stmt_iterator si = bb_vinfo->region_end;
4633 gimple *stmt;
4636 if (!gsi_stmt (si))
4637 si = gsi_last_bb (bb_vinfo->bb);
4638 else
4639 gsi_prev (&si);
4640 stmt = gsi_stmt (si);
4641 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
4642 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4643 vect_determine_stmt_precisions (stmt_info);
4645 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4649 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
4651 struct vect_recog_func
4653 vect_recog_func_ptr fn;
4654 const char *name;
4657 /* Note that ordering matters - the first pattern matching on a stmt is
4658 taken which means usually the more complex one needs to preceed the
4659 less comples onex (widen_sum only after dot_prod or sad for example). */
4660 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4661 { vect_recog_over_widening_pattern, "over_widening" },
4662 /* Must come after over_widening, which narrows the shift as much as
4663 possible beforehand. */
4664 { vect_recog_average_pattern, "average" },
4665 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4666 { vect_recog_widen_mult_pattern, "widen_mult" },
4667 { vect_recog_dot_prod_pattern, "dot_prod" },
4668 { vect_recog_sad_pattern, "sad" },
4669 { vect_recog_widen_sum_pattern, "widen_sum" },
4670 { vect_recog_pow_pattern, "pow" },
4671 { vect_recog_widen_shift_pattern, "widen_shift" },
4672 { vect_recog_rotate_pattern, "rotate" },
4673 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4674 { vect_recog_divmod_pattern, "divmod" },
4675 { vect_recog_mult_pattern, "mult" },
4676 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4677 { vect_recog_bool_pattern, "bool" },
4678 /* This must come before mask conversion, and includes the parts
4679 of mask conversion that are needed for gather and scatter
4680 internal functions. */
4681 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4682 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4685 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4687 /* Mark statements that are involved in a pattern. */
4689 static inline void
4690 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
4691 tree pattern_vectype)
4693 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4695 gimple *orig_pattern_stmt = NULL;
4696 if (is_pattern_stmt_p (orig_stmt_info))
4698 /* We're replacing a statement in an existing pattern definition
4699 sequence. */
4700 orig_pattern_stmt = orig_stmt_info->stmt;
4701 if (dump_enabled_p ())
4702 dump_printf_loc (MSG_NOTE, vect_location,
4703 "replacing earlier pattern %G", orig_pattern_stmt);
4705 /* To keep the book-keeping simple, just swap the lhs of the
4706 old and new statements, so that the old one has a valid but
4707 unused lhs. */
4708 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
4709 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
4710 gimple_set_lhs (pattern_stmt, old_lhs);
4712 if (dump_enabled_p ())
4713 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
4715 /* Switch to the statement that ORIG replaces. */
4716 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
4718 /* We shouldn't be replacing the main pattern statement. */
4719 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
4720 != orig_pattern_stmt);
4723 if (def_seq)
4724 for (gimple_stmt_iterator si = gsi_start (def_seq);
4725 !gsi_end_p (si); gsi_next (&si))
4726 vect_init_pattern_stmt (gsi_stmt (si), orig_stmt_info, pattern_vectype);
4728 if (orig_pattern_stmt)
4730 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4732 /* Insert all the new pattern statements before the original one. */
4733 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4734 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
4735 orig_def_seq);
4736 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4737 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4739 /* Remove the pattern statement that this new pattern replaces. */
4740 gsi_remove (&gsi, false);
4742 else
4743 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4746 /* Function vect_pattern_recog_1
4748 Input:
4749 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4750 computation pattern.
4751 STMT_INFO: A stmt from which the pattern search should start.
4753 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4754 a sequence of statements that has the same functionality and can be
4755 used to replace STMT_INFO. It returns the last statement in the sequence
4756 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
4757 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4758 statement, having first checked that the target supports the new operation
4759 in that type.
4761 This function also does some bookkeeping, as explained in the documentation
4762 for vect_recog_pattern. */
4764 static void
4765 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
4767 vec_info *vinfo = stmt_info->vinfo;
4768 gimple *pattern_stmt;
4769 loop_vec_info loop_vinfo;
4770 tree pattern_vectype;
4772 /* If this statement has already been replaced with pattern statements,
4773 leave the original statement alone, since the first match wins.
4774 Instead try to match against the definition statements that feed
4775 the main pattern statement. */
4776 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4778 gimple_stmt_iterator gsi;
4779 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4780 !gsi_end_p (gsi); gsi_next (&gsi))
4781 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
4782 return;
4785 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4786 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
4787 if (!pattern_stmt)
4789 /* Clear any half-formed pattern definition sequence. */
4790 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4791 return;
4794 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4795 gcc_assert (pattern_vectype);
4797 /* Found a vectorizable pattern. */
4798 if (dump_enabled_p ())
4799 dump_printf_loc (MSG_NOTE, vect_location,
4800 "%s pattern recognized: %G",
4801 recog_func->name, pattern_stmt);
4803 /* Mark the stmts that are involved in the pattern. */
4804 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
4806 /* Patterns cannot be vectorized using SLP, because they change the order of
4807 computation. */
4808 if (loop_vinfo)
4810 unsigned ix, ix2;
4811 stmt_vec_info *elem_ptr;
4812 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4813 elem_ptr, *elem_ptr == stmt_info);
4818 /* Function vect_pattern_recog
4820 Input:
4821 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4822 computation idioms.
4824 Output - for each computation idiom that is detected we create a new stmt
4825 that provides the same functionality and that can be vectorized. We
4826 also record some information in the struct_stmt_info of the relevant
4827 stmts, as explained below:
4829 At the entry to this function we have the following stmts, with the
4830 following initial value in the STMT_VINFO fields:
4832 stmt in_pattern_p related_stmt vec_stmt
4833 S1: a_i = .... - - -
4834 S2: a_2 = ..use(a_i).. - - -
4835 S3: a_1 = ..use(a_2).. - - -
4836 S4: a_0 = ..use(a_1).. - - -
4837 S5: ... = ..use(a_0).. - - -
4839 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4840 represented by a single stmt. We then:
4841 - create a new stmt S6 equivalent to the pattern (the stmt is not
4842 inserted into the code)
4843 - fill in the STMT_VINFO fields as follows:
4845 in_pattern_p related_stmt vec_stmt
4846 S1: a_i = .... - - -
4847 S2: a_2 = ..use(a_i).. - - -
4848 S3: a_1 = ..use(a_2).. - - -
4849 S4: a_0 = ..use(a_1).. true S6 -
4850 '---> S6: a_new = .... - S4 -
4851 S5: ... = ..use(a_0).. - - -
4853 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4854 to each other through the RELATED_STMT field).
4856 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4857 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4858 remain irrelevant unless used by stmts other than S4.
4860 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4861 (because they are marked as irrelevant). It will vectorize S6, and record
4862 a pointer to the new vector stmt VS6 from S6 (as usual).
4863 S4 will be skipped, and S5 will be vectorized as usual:
4865 in_pattern_p related_stmt vec_stmt
4866 S1: a_i = .... - - -
4867 S2: a_2 = ..use(a_i).. - - -
4868 S3: a_1 = ..use(a_2).. - - -
4869 > VS6: va_new = .... - - -
4870 S4: a_0 = ..use(a_1).. true S6 VS6
4871 '---> S6: a_new = .... - S4 VS6
4872 > VS5: ... = ..vuse(va_new).. - - -
4873 S5: ... = ..use(a_0).. - - -
4875 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4876 elsewhere), and we'll end up with:
4878 VS6: va_new = ....
4879 VS5: ... = ..vuse(va_new)..
4881 In case of more than one pattern statements, e.g., widen-mult with
4882 intermediate type:
4884 S1 a_t = ;
4885 S2 a_T = (TYPE) a_t;
4886 '--> S3: a_it = (interm_type) a_t;
4887 S4 prod_T = a_T * CONST;
4888 '--> S5: prod_T' = a_it w* CONST;
4890 there may be other users of a_T outside the pattern. In that case S2 will
4891 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4892 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4893 be recorded in S3. */
4895 void
4896 vect_pattern_recog (vec_info *vinfo)
4898 struct loop *loop;
4899 basic_block *bbs;
4900 unsigned int nbbs;
4901 gimple_stmt_iterator si;
4902 unsigned int i, j;
4904 vect_determine_precisions (vinfo);
4906 DUMP_VECT_SCOPE ("vect_pattern_recog");
4908 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4910 loop = LOOP_VINFO_LOOP (loop_vinfo);
4911 bbs = LOOP_VINFO_BBS (loop_vinfo);
4912 nbbs = loop->num_nodes;
4914 /* Scan through the loop stmts, applying the pattern recognition
4915 functions starting at each stmt visited: */
4916 for (i = 0; i < nbbs; i++)
4918 basic_block bb = bbs[i];
4919 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4921 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
4922 /* Scan over all generic vect_recog_xxx_pattern functions. */
4923 for (j = 0; j < NUM_PATTERNS; j++)
4924 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
4925 stmt_info);
4929 else
4931 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4932 for (si = bb_vinfo->region_begin;
4933 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4935 gimple *stmt = gsi_stmt (si);
4936 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
4937 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
4938 continue;
4940 /* Scan over all generic vect_recog_xxx_pattern functions. */
4941 for (j = 0; j < NUM_PATTERNS; j++)
4942 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);