[35/46] Alter interfaces within vect_pattern_recog
[official-gcc.git] / gcc / tree-vect-patterns.c
blob0f710e5b23ca7403afd01e7f37fd7d5f584dd8bc
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
53 static bool
54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
56 value_range_type vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
71 return true;
73 else
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
80 return false;
84 /* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
87 static void
88 vect_pattern_detected (const char *name, gimple *stmt)
90 if (dump_enabled_p ())
92 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: ", name);
93 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
97 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
98 return the pattern statement's stmt_vec_info. Set its vector type to
99 VECTYPE if it doesn't have one already. */
101 static stmt_vec_info
102 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
103 tree vectype)
105 vec_info *vinfo = orig_stmt_info->vinfo;
106 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
107 if (pattern_stmt_info == NULL_STMT_VEC_INFO)
108 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
109 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
111 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
112 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
113 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
114 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
115 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
116 return pattern_stmt_info;
119 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
120 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
121 have one already. */
123 static void
124 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
125 tree vectype)
127 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
128 STMT_VINFO_RELATED_STMT (orig_stmt_info)
129 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
132 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
133 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
134 be different from the vector type of the final pattern statement. */
136 static inline void
137 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
138 tree vectype = NULL_TREE)
140 vec_info *vinfo = stmt_info->vinfo;
141 if (vectype)
143 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
144 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
146 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
147 new_stmt);
150 /* The caller wants to perform new operations on vect_external variable
151 VAR, so that the result of the operations would also be vect_external.
152 Return the edge on which the operations can be performed, if one exists.
153 Return null if the operations should instead be treated as part of
154 the pattern that needs them. */
156 static edge
157 vect_get_external_def_edge (vec_info *vinfo, tree var)
159 edge e = NULL;
160 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
162 e = loop_preheader_edge (loop_vinfo->loop);
163 if (!SSA_NAME_IS_DEFAULT_DEF (var))
165 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
166 if (bb == NULL
167 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
168 e = NULL;
171 return e;
174 /* Return true if the target supports a vector version of CODE,
175 where CODE is known to map to a direct optab. ITYPE specifies
176 the type of (some of) the scalar inputs and OTYPE specifies the
177 type of the scalar result.
179 If CODE allows the inputs and outputs to have different type
180 (such as for WIDEN_SUM_EXPR), it is the input mode rather
181 than the output mode that determines the appropriate target pattern.
182 Operand 0 of the target pattern then specifies the mode that the output
183 must have.
185 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
186 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
187 is nonnull. */
189 static bool
190 vect_supportable_direct_optab_p (tree otype, tree_code code,
191 tree itype, tree *vecotype_out,
192 tree *vecitype_out = NULL)
194 tree vecitype = get_vectype_for_scalar_type (itype);
195 if (!vecitype)
196 return false;
198 tree vecotype = get_vectype_for_scalar_type (otype);
199 if (!vecotype)
200 return false;
202 optab optab = optab_for_tree_code (code, vecitype, optab_default);
203 if (!optab)
204 return false;
206 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
207 if (icode == CODE_FOR_nothing
208 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
209 return false;
211 *vecotype_out = vecotype;
212 if (vecitype_out)
213 *vecitype_out = vecitype;
214 return true;
217 /* Round bit precision PRECISION up to a full element. */
219 static unsigned int
220 vect_element_precision (unsigned int precision)
222 precision = 1 << ceil_log2 (precision);
223 return MAX (precision, BITS_PER_UNIT);
226 /* If OP is defined by a statement that's being considered for vectorization,
227 return information about that statement, otherwise return NULL. */
229 static stmt_vec_info
230 vect_get_internal_def (vec_info *vinfo, tree op)
232 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
233 if (def_stmt_info
234 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
235 return def_stmt_info;
236 return NULL;
239 /* Check whether NAME, an ssa-name used in STMT_VINFO,
240 is a result of a type promotion, such that:
241 DEF_STMT: NAME = NOP (name0)
242 If CHECK_SIGN is TRUE, check that either both types are signed or both are
243 unsigned. */
245 static bool
246 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
247 tree *orig_type, gimple **def_stmt, bool *promotion)
249 tree type = TREE_TYPE (name);
250 tree oprnd0;
251 enum vect_def_type dt;
253 stmt_vec_info def_stmt_info;
254 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
255 def_stmt))
256 return false;
258 if (dt != vect_internal_def
259 && dt != vect_external_def && dt != vect_constant_def)
260 return false;
262 if (!*def_stmt)
263 return false;
265 if (!is_gimple_assign (*def_stmt))
266 return false;
268 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
269 return false;
271 oprnd0 = gimple_assign_rhs1 (*def_stmt);
273 *orig_type = TREE_TYPE (oprnd0);
274 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
275 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
276 return false;
278 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
279 *promotion = true;
280 else
281 *promotion = false;
283 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
284 return false;
286 return true;
289 /* Holds information about an input operand after some sign changes
290 and type promotions have been peeled away. */
291 struct vect_unpromoted_value {
292 vect_unpromoted_value ();
294 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
296 /* The value obtained after peeling away zero or more casts. */
297 tree op;
299 /* The type of OP. */
300 tree type;
302 /* The definition type of OP. */
303 vect_def_type dt;
305 /* If OP is the result of peeling at least one cast, and if the cast
306 of OP itself is a vectorizable statement, CASTER identifies that
307 statement, otherwise it is null. */
308 stmt_vec_info caster;
311 inline vect_unpromoted_value::vect_unpromoted_value ()
312 : op (NULL_TREE),
313 type (NULL_TREE),
314 dt (vect_uninitialized_def),
315 caster (NULL)
319 /* Set the operand to OP_IN, its definition type to DT_IN, and the
320 statement that casts it to CASTER_IN. */
322 inline void
323 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
324 stmt_vec_info caster_in)
326 op = op_in;
327 type = TREE_TYPE (op);
328 dt = dt_in;
329 caster = caster_in;
332 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
333 to reach some vectorizable inner operand OP', continuing as long as it
334 is possible to convert OP' back to OP using a possible sign change
335 followed by a possible promotion P. Return this OP', or null if OP is
336 not a vectorizable SSA name. If there is a promotion P, describe its
337 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
338 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
339 have more than one user.
341 A successful return means that it is possible to go from OP' to OP
342 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
343 whereas the cast from UNPROM to OP might be a promotion, a sign
344 change, or a nop.
346 E.g. say we have:
348 signed short *ptr = ...;
349 signed short C = *ptr;
350 unsigned short B = (unsigned short) C; // sign change
351 signed int A = (signed int) B; // unsigned promotion
352 ...possible other uses of A...
353 unsigned int OP = (unsigned int) A; // sign change
355 In this case it's possible to go directly from C to OP using:
357 OP = (unsigned int) (unsigned short) C;
358 +------------+ +--------------+
359 promotion sign change
361 so OP' would be C. The input to the promotion is B, so UNPROM
362 would describe B. */
364 static tree
365 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
366 vect_unpromoted_value *unprom,
367 bool *single_use_p = NULL)
369 tree res = NULL_TREE;
370 tree op_type = TREE_TYPE (op);
371 unsigned int orig_precision = TYPE_PRECISION (op_type);
372 stmt_vec_info caster = NULL;
373 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
375 /* See whether OP is simple enough to vectorize. */
376 stmt_vec_info def_stmt_info;
377 gimple *def_stmt;
378 vect_def_type dt;
379 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
380 break;
382 /* If OP is the input of a demotion, skip over it to see whether
383 OP is itself the result of a promotion. If so, the combined
384 effect of the promotion and the demotion might fit the required
385 pattern, otherwise neither operation fits.
387 This copes with cases such as the result of an arithmetic
388 operation being truncated before being stored, and where that
389 arithmetic operation has been recognized as an over-widened one. */
390 if (TYPE_PRECISION (op_type) <= orig_precision)
392 /* Use OP as the UNPROM described above if we haven't yet
393 found a promotion, or if using the new input preserves the
394 sign of the previous promotion. */
395 if (!res
396 || TYPE_PRECISION (unprom->type) == orig_precision
397 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
398 unprom->set_op (op, dt, caster);
399 /* Stop if we've already seen a promotion and if this
400 conversion does more than change the sign. */
401 else if (TYPE_PRECISION (op_type)
402 != TYPE_PRECISION (unprom->type))
403 break;
405 /* The sequence now extends to OP. */
406 res = op;
409 /* See whether OP is defined by a cast. Record it as CASTER if
410 the cast is potentially vectorizable. */
411 if (!def_stmt)
412 break;
413 caster = def_stmt_info;
415 /* Ignore pattern statements, since we don't link uses for them. */
416 if (caster
417 && single_use_p
418 && !STMT_VINFO_RELATED_STMT (caster)
419 && !has_single_use (res))
420 *single_use_p = false;
422 gassign *assign = dyn_cast <gassign *> (def_stmt);
423 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
424 break;
426 /* Continue with the input to the cast. */
427 op = gimple_assign_rhs1 (def_stmt);
428 op_type = TREE_TYPE (op);
430 return res;
433 /* OP is an integer operand to an operation that returns TYPE, and we
434 want to treat the operation as a widening one. So far we can treat
435 it as widening from *COMMON_TYPE.
437 Return true if OP is suitable for such a widening operation,
438 either widening from *COMMON_TYPE or from some supertype of it.
439 Update *COMMON_TYPE to the supertype in the latter case.
441 SHIFT_P is true if OP is a shift amount. */
443 static bool
444 vect_joust_widened_integer (tree type, bool shift_p, tree op,
445 tree *common_type)
447 /* Calculate the minimum precision required by OP, without changing
448 the sign of either operand. */
449 unsigned int precision;
450 if (shift_p)
452 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
453 return false;
454 precision = TREE_INT_CST_LOW (op);
456 else
458 precision = wi::min_precision (wi::to_widest (op),
459 TYPE_SIGN (*common_type));
460 if (precision * 2 > TYPE_PRECISION (type))
461 return false;
464 /* If OP requires a wider type, switch to that type. The checks
465 above ensure that this is still narrower than the result. */
466 precision = vect_element_precision (precision);
467 if (TYPE_PRECISION (*common_type) < precision)
468 *common_type = build_nonstandard_integer_type
469 (precision, TYPE_UNSIGNED (*common_type));
470 return true;
473 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
474 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
476 static bool
477 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
479 if (types_compatible_p (*common_type, new_type))
480 return true;
482 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
483 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
484 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
485 return true;
487 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
488 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
489 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
491 *common_type = new_type;
492 return true;
495 /* We have mismatched signs, with the signed type being
496 no wider than the unsigned type. In this case we need
497 a wider signed type. */
498 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
499 TYPE_PRECISION (new_type));
500 precision *= 2;
501 if (precision * 2 > TYPE_PRECISION (type))
502 return false;
504 *common_type = build_nonstandard_integer_type (precision, false);
505 return true;
508 /* Check whether STMT_INFO can be viewed as a tree of integer operations
509 in which each node either performs CODE or WIDENED_CODE, and where
510 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
511 specifies the maximum number of leaf operands. SHIFT_P says whether
512 CODE and WIDENED_CODE are some sort of shift.
514 If STMT_INFO is such a tree, return the number of leaf operands
515 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
516 to a type that (a) is narrower than the result of STMT_INFO and
517 (b) can hold all leaf operand values.
519 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
520 exists. */
522 static unsigned int
523 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
524 tree_code widened_code, bool shift_p,
525 unsigned int max_nops,
526 vect_unpromoted_value *unprom, tree *common_type)
528 /* Check for an integer operation with the right code. */
529 vec_info *vinfo = stmt_info->vinfo;
530 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
531 if (!assign)
532 return 0;
534 tree_code rhs_code = gimple_assign_rhs_code (assign);
535 if (rhs_code != code && rhs_code != widened_code)
536 return 0;
538 tree type = gimple_expr_type (assign);
539 if (!INTEGRAL_TYPE_P (type))
540 return 0;
542 /* Assume that both operands will be leaf operands. */
543 max_nops -= 2;
545 /* Check the operands. */
546 unsigned int next_op = 0;
547 for (unsigned int i = 0; i < 2; ++i)
549 vect_unpromoted_value *this_unprom = &unprom[next_op];
550 unsigned int nops = 1;
551 tree op = gimple_op (assign, i + 1);
552 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
554 /* We already have a common type from earlier operands.
555 Update it to account for OP. */
556 this_unprom->set_op (op, vect_constant_def);
557 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
558 return 0;
560 else
562 /* Only allow shifts by constants. */
563 if (shift_p && i == 1)
564 return 0;
566 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
567 this_unprom))
568 return 0;
570 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
572 /* The operand isn't widened. If STMT_INFO has the code
573 for an unwidened operation, recursively check whether
574 this operand is a node of the tree. */
575 if (rhs_code != code
576 || max_nops == 0
577 || this_unprom->dt != vect_internal_def)
578 return 0;
580 /* Give back the leaf slot allocated above now that we're
581 not treating this as a leaf operand. */
582 max_nops += 1;
584 /* Recursively process the definition of the operand. */
585 stmt_vec_info def_stmt_info
586 = vinfo->lookup_def (this_unprom->op);
587 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
588 shift_p, max_nops, this_unprom,
589 common_type);
590 if (nops == 0)
591 return 0;
593 max_nops -= nops;
595 else
597 /* Make sure that the operand is narrower than the result. */
598 if (TYPE_PRECISION (this_unprom->type) * 2
599 > TYPE_PRECISION (type))
600 return 0;
602 /* Update COMMON_TYPE for the new operand. */
603 if (i == 0)
604 *common_type = this_unprom->type;
605 else if (!vect_joust_widened_type (type, this_unprom->type,
606 common_type))
607 return 0;
610 next_op += nops;
612 return next_op;
615 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
616 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
618 static tree
619 vect_recog_temp_ssa_var (tree type, gimple *stmt)
621 return make_temp_ssa_name (type, stmt, "patt");
624 /* STMT2_INFO describes a type conversion that could be split into STMT1
625 followed by a version of STMT2_INFO that takes NEW_RHS as its first
626 input. Try to do this using pattern statements, returning true on
627 success. */
629 static bool
630 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
631 gimple *stmt1, tree vectype)
633 if (is_pattern_stmt_p (stmt2_info))
635 /* STMT2_INFO is part of a pattern. Get the statement to which
636 the pattern is attached. */
637 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
638 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
640 if (dump_enabled_p ())
642 dump_printf_loc (MSG_NOTE, vect_location,
643 "Splitting pattern statement: ");
644 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
647 /* Since STMT2_INFO is a pattern statement, we can change it
648 in-situ without worrying about changing the code for the
649 containing block. */
650 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
652 if (dump_enabled_p ())
654 dump_printf_loc (MSG_NOTE, vect_location, "into: ");
655 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
656 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
660 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
661 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
662 /* STMT2_INFO is the actual pattern statement. Add STMT1
663 to the end of the definition sequence. */
664 gimple_seq_add_stmt_without_update (def_seq, stmt1);
665 else
667 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
668 before it. */
669 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
670 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
672 return true;
674 else
676 /* STMT2_INFO doesn't yet have a pattern. Try to create a
677 two-statement pattern now. */
678 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
679 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
680 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
681 if (!lhs_vectype)
682 return false;
684 if (dump_enabled_p ())
686 dump_printf_loc (MSG_NOTE, vect_location,
687 "Splitting statement: ");
688 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
691 /* Add STMT1 as a singleton pattern definition sequence. */
692 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
693 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
694 gimple_seq_add_stmt_without_update (def_seq, stmt1);
696 /* Build the second of the two pattern statements. */
697 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
698 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
699 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
701 if (dump_enabled_p ())
703 dump_printf_loc (MSG_NOTE, vect_location,
704 "into pattern statements: ");
705 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
706 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
707 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt2, 0);
710 return true;
714 /* Convert UNPROM to TYPE and return the result, adding new statements
715 to STMT_INFO's pattern definition statements if no better way is
716 available. VECTYPE is the vector form of TYPE. */
718 static tree
719 vect_convert_input (stmt_vec_info stmt_info, tree type,
720 vect_unpromoted_value *unprom, tree vectype)
722 /* Check for a no-op conversion. */
723 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
724 return unprom->op;
726 /* Allow the caller to create constant vect_unpromoted_values. */
727 if (TREE_CODE (unprom->op) == INTEGER_CST)
728 return wide_int_to_tree (type, wi::to_widest (unprom->op));
730 /* See if we can reuse an existing result. */
731 if (unprom->caster)
733 tree lhs = gimple_get_lhs (unprom->caster->stmt);
734 if (types_compatible_p (TREE_TYPE (lhs), type))
735 return lhs;
738 /* We need a new conversion statement. */
739 tree new_op = vect_recog_temp_ssa_var (type, NULL);
740 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
742 /* If the operation is the input to a vectorizable cast, try splitting
743 that cast into two, taking the required result as a mid-way point. */
744 if (unprom->caster)
746 tree lhs = gimple_get_lhs (unprom->caster->stmt);
747 if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
748 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
749 && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
750 && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
751 return new_op;
754 /* If OP is an external value, see if we can insert the new statement
755 on an incoming edge. */
756 if (unprom->dt == vect_external_def)
757 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
759 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
760 gcc_assert (!new_bb);
761 return new_op;
764 /* As a (common) last resort, add the statement to the pattern itself. */
765 append_pattern_def_seq (stmt_info, new_stmt, vectype);
766 return new_op;
769 /* Invoke vect_convert_input for N elements of UNPROM and store the
770 result in the corresponding elements of RESULT. */
772 static void
773 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
774 tree *result, tree type, vect_unpromoted_value *unprom,
775 tree vectype)
777 for (unsigned int i = 0; i < n; ++i)
779 unsigned int j;
780 for (j = 0; j < i; ++j)
781 if (unprom[j].op == unprom[i].op)
782 break;
783 if (j < i)
784 result[i] = result[j];
785 else
786 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
790 /* The caller has created a (possibly empty) sequence of pattern definition
791 statements followed by a single statement PATTERN_STMT. Cast the result
792 of this final statement to TYPE. If a new statement is needed, add
793 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
794 and return the new statement, otherwise return PATTERN_STMT as-is.
795 VECITYPE is the vector form of PATTERN_STMT's result type. */
797 static gimple *
798 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
799 tree vecitype)
801 tree lhs = gimple_get_lhs (pattern_stmt);
802 if (!types_compatible_p (type, TREE_TYPE (lhs)))
804 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
805 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
806 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
808 return pattern_stmt;
811 /* Return true if STMT_VINFO describes a reduction for which reassociation
812 is allowed. If STMT_INFO is part of a group, assume that it's part of
813 a reduction chain and optimistically assume that all statements
814 except the last allow reassociation. */
816 static bool
817 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
819 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
820 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
821 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL_STMT_VEC_INFO);
824 /* As above, but also require it to have code CODE and to be a reduction
825 in the outermost loop. When returning true, store the operands in
826 *OP0_OUT and *OP1_OUT. */
828 static bool
829 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
830 tree *op0_out, tree *op1_out)
832 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
833 if (!loop_info)
834 return false;
836 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
837 if (!assign || gimple_assign_rhs_code (assign) != code)
838 return false;
840 /* We don't allow changing the order of the computation in the inner-loop
841 when doing outer-loop vectorization. */
842 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
843 if (loop && nested_in_vect_loop_p (loop, stmt_info))
844 return false;
846 if (!vect_reassociating_reduction_p (stmt_info))
847 return false;
849 *op0_out = gimple_assign_rhs1 (assign);
850 *op1_out = gimple_assign_rhs2 (assign);
851 return true;
854 /* Function vect_recog_dot_prod_pattern
856 Try to find the following pattern:
858 type x_t, y_t;
859 TYPE1 prod;
860 TYPE2 sum = init;
861 loop:
862 sum_0 = phi <init, sum_1>
863 S1 x_t = ...
864 S2 y_t = ...
865 S3 x_T = (TYPE1) x_t;
866 S4 y_T = (TYPE1) y_t;
867 S5 prod = x_T * y_T;
868 [S6 prod = (TYPE2) prod; #optional]
869 S7 sum_1 = prod + sum_0;
871 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
872 same size of 'TYPE1' or bigger. This is a special case of a reduction
873 computation.
875 Input:
877 * STMT_VINFO: The stmt from which the pattern search begins. In the
878 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
879 will be detected.
881 Output:
883 * TYPE_OUT: The type of the output of this pattern.
885 * Return value: A new stmt that will be used to replace the sequence of
886 stmts that constitute the pattern. In this case it will be:
887 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
889 Note: The dot-prod idiom is a widening reduction pattern that is
890 vectorized without preserving all the intermediate results. It
891 produces only N/2 (widened) results (by summing up pairs of
892 intermediate results) rather than all N results. Therefore, we
893 cannot allow this pattern when we want to get all the results and in
894 the correct order (as is the case when this computation is in an
895 inner-loop nested in an outer-loop that us being vectorized). */
897 static gimple *
898 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
900 tree oprnd0, oprnd1;
901 gimple *last_stmt = stmt_vinfo->stmt;
902 vec_info *vinfo = stmt_vinfo->vinfo;
903 tree type, half_type;
904 gimple *pattern_stmt;
905 tree var;
907 /* Look for the following pattern
908 DX = (TYPE1) X;
909 DY = (TYPE1) Y;
910 DPROD = DX * DY;
911 DDPROD = (TYPE2) DPROD;
912 sum_1 = DDPROD + sum_0;
913 In which
914 - DX is double the size of X
915 - DY is double the size of Y
916 - DX, DY, DPROD all have the same type
917 - sum is the same size of DPROD or bigger
918 - sum has been recognized as a reduction variable.
920 This is equivalent to:
921 DPROD = X w* Y; #widen mult
922 sum_1 = DPROD w+ sum_0; #widen summation
924 DPROD = X w* Y; #widen mult
925 sum_1 = DPROD + sum_0; #summation
928 /* Starting from LAST_STMT, follow the defs of its uses in search
929 of the above pattern. */
931 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
932 &oprnd0, &oprnd1))
933 return NULL;
935 type = gimple_expr_type (last_stmt);
937 vect_unpromoted_value unprom_mult;
938 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
940 /* So far so good. Since last_stmt was detected as a (summation) reduction,
941 we know that oprnd1 is the reduction variable (defined by a loop-header
942 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
943 Left to check that oprnd0 is defined by a (widen_)mult_expr */
944 if (!oprnd0)
945 return NULL;
947 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
948 if (!mult_vinfo)
949 return NULL;
951 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
952 inside the loop (in case we are analyzing an outer-loop). */
953 vect_unpromoted_value unprom0[2];
954 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
955 false, 2, unprom0, &half_type))
956 return NULL;
958 /* If there are two widening operations, make sure they agree on
959 the sign of the extension. */
960 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
961 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
962 return NULL;
964 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
966 tree half_vectype;
967 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
968 type_out, &half_vectype))
969 return NULL;
971 /* Get the inputs in the appropriate types. */
972 tree mult_oprnd[2];
973 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
974 unprom0, half_vectype);
976 var = vect_recog_temp_ssa_var (type, NULL);
977 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
978 mult_oprnd[0], mult_oprnd[1], oprnd1);
980 return pattern_stmt;
984 /* Function vect_recog_sad_pattern
986 Try to find the following Sum of Absolute Difference (SAD) pattern:
988 type x_t, y_t;
989 signed TYPE1 diff, abs_diff;
990 TYPE2 sum = init;
991 loop:
992 sum_0 = phi <init, sum_1>
993 S1 x_t = ...
994 S2 y_t = ...
995 S3 x_T = (TYPE1) x_t;
996 S4 y_T = (TYPE1) y_t;
997 S5 diff = x_T - y_T;
998 S6 abs_diff = ABS_EXPR <diff>;
999 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1000 S8 sum_1 = abs_diff + sum_0;
1002 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1003 same size of 'TYPE1' or bigger. This is a special case of a reduction
1004 computation.
1006 Input:
1008 * STMT_VINFO: The stmt from which the pattern search begins. In the
1009 example, when this function is called with S8, the pattern
1010 {S3,S4,S5,S6,S7,S8} will be detected.
1012 Output:
1014 * TYPE_OUT: The type of the output of this pattern.
1016 * Return value: A new stmt that will be used to replace the sequence of
1017 stmts that constitute the pattern. In this case it will be:
1018 SAD_EXPR <x_t, y_t, sum_0>
1021 static gimple *
1022 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1024 gimple *last_stmt = stmt_vinfo->stmt;
1025 vec_info *vinfo = stmt_vinfo->vinfo;
1026 tree half_type;
1028 /* Look for the following pattern
1029 DX = (TYPE1) X;
1030 DY = (TYPE1) Y;
1031 DDIFF = DX - DY;
1032 DAD = ABS_EXPR <DDIFF>;
1033 DDPROD = (TYPE2) DPROD;
1034 sum_1 = DAD + sum_0;
1035 In which
1036 - DX is at least double the size of X
1037 - DY is at least double the size of Y
1038 - DX, DY, DDIFF, DAD all have the same type
1039 - sum is the same size of DAD or bigger
1040 - sum has been recognized as a reduction variable.
1042 This is equivalent to:
1043 DDIFF = X w- Y; #widen sub
1044 DAD = ABS_EXPR <DDIFF>;
1045 sum_1 = DAD w+ sum_0; #widen summation
1047 DDIFF = X w- Y; #widen sub
1048 DAD = ABS_EXPR <DDIFF>;
1049 sum_1 = DAD + sum_0; #summation
1052 /* Starting from LAST_STMT, follow the defs of its uses in search
1053 of the above pattern. */
1055 tree plus_oprnd0, plus_oprnd1;
1056 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1057 &plus_oprnd0, &plus_oprnd1))
1058 return NULL;
1060 tree sum_type = gimple_expr_type (last_stmt);
1062 /* Any non-truncating sequence of conversions is OK here, since
1063 with a successful match, the result of the ABS(U) is known to fit
1064 within the nonnegative range of the result type. (It cannot be the
1065 negative of the minimum signed value due to the range of the widening
1066 MINUS_EXPR.) */
1067 vect_unpromoted_value unprom_abs;
1068 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1069 &unprom_abs);
1071 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1072 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1073 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1074 Then check that plus_oprnd0 is defined by an abs_expr. */
1076 if (!plus_oprnd0)
1077 return NULL;
1079 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1080 if (!abs_stmt_vinfo)
1081 return NULL;
1083 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1084 inside the loop (in case we are analyzing an outer-loop). */
1085 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1086 if (!abs_stmt
1087 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1088 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1089 return NULL;
1091 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1092 tree abs_type = TREE_TYPE (abs_oprnd);
1093 if (TYPE_UNSIGNED (abs_type))
1094 return NULL;
1096 /* Peel off conversions from the ABS input. This can involve sign
1097 changes (e.g. from an unsigned subtraction to a signed ABS input)
1098 or signed promotion, but it can't include unsigned promotion.
1099 (Note that ABS of an unsigned promotion should have been folded
1100 away before now anyway.) */
1101 vect_unpromoted_value unprom_diff;
1102 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1103 &unprom_diff);
1104 if (!abs_oprnd)
1105 return NULL;
1106 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1107 && TYPE_UNSIGNED (unprom_diff.type))
1108 return NULL;
1110 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1111 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1112 if (!diff_stmt_vinfo)
1113 return NULL;
1115 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1116 inside the loop (in case we are analyzing an outer-loop). */
1117 vect_unpromoted_value unprom[2];
1118 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1119 false, 2, unprom, &half_type))
1120 return NULL;
1122 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1124 tree half_vectype;
1125 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1126 type_out, &half_vectype))
1127 return NULL;
1129 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1130 tree sad_oprnd[2];
1131 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1132 unprom, half_vectype);
1134 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1135 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1136 sad_oprnd[1], plus_oprnd1);
1138 return pattern_stmt;
1141 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1142 so that it can be treated as though it had the form:
1144 A_TYPE a;
1145 B_TYPE b;
1146 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1147 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1148 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1149 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1150 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1152 Try to replace the pattern with:
1154 A_TYPE a;
1155 B_TYPE b;
1156 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1157 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1158 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1159 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1161 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1163 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1164 name of the pattern being matched, for dump purposes. */
1166 static gimple *
1167 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1168 tree_code orig_code, tree_code wide_code,
1169 bool shift_p, const char *name)
1171 gimple *last_stmt = last_stmt_info->stmt;
1173 vect_unpromoted_value unprom[2];
1174 tree half_type;
1175 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1176 shift_p, 2, unprom, &half_type))
1177 return NULL;
1179 /* Pattern detected. */
1180 vect_pattern_detected (name, last_stmt);
1182 tree type = gimple_expr_type (last_stmt);
1183 tree itype = type;
1184 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1185 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1186 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1187 TYPE_UNSIGNED (half_type));
1189 /* Check target support */
1190 tree vectype = get_vectype_for_scalar_type (half_type);
1191 tree vecitype = get_vectype_for_scalar_type (itype);
1192 enum tree_code dummy_code;
1193 int dummy_int;
1194 auto_vec<tree> dummy_vec;
1195 if (!vectype
1196 || !vecitype
1197 || !supportable_widening_operation (wide_code, last_stmt_info,
1198 vecitype, vectype,
1199 &dummy_code, &dummy_code,
1200 &dummy_int, &dummy_vec))
1201 return NULL;
1203 *type_out = get_vectype_for_scalar_type (type);
1204 if (!*type_out)
1205 return NULL;
1207 tree oprnd[2];
1208 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1210 tree var = vect_recog_temp_ssa_var (itype, NULL);
1211 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1212 oprnd[0], oprnd[1]);
1214 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1217 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1218 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1220 static gimple *
1221 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1223 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1224 WIDEN_MULT_EXPR, false,
1225 "vect_recog_widen_mult_pattern");
1228 /* Function vect_recog_pow_pattern
1230 Try to find the following pattern:
1232 x = POW (y, N);
1234 with POW being one of pow, powf, powi, powif and N being
1235 either 2 or 0.5.
1237 Input:
1239 * STMT_VINFO: The stmt from which the pattern search begins.
1241 Output:
1243 * TYPE_OUT: The type of the output of this pattern.
1245 * Return value: A new stmt that will be used to replace the sequence of
1246 stmts that constitute the pattern. In this case it will be:
1247 x = x * x
1249 x = sqrt (x)
1252 static gimple *
1253 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1255 gimple *last_stmt = stmt_vinfo->stmt;
1256 tree base, exp;
1257 gimple *stmt;
1258 tree var;
1260 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1261 return NULL;
1263 switch (gimple_call_combined_fn (last_stmt))
1265 CASE_CFN_POW:
1266 CASE_CFN_POWI:
1267 break;
1269 default:
1270 return NULL;
1273 base = gimple_call_arg (last_stmt, 0);
1274 exp = gimple_call_arg (last_stmt, 1);
1275 if (TREE_CODE (exp) != REAL_CST
1276 && TREE_CODE (exp) != INTEGER_CST)
1278 if (flag_unsafe_math_optimizations
1279 && TREE_CODE (base) == REAL_CST
1280 && !gimple_call_internal_p (last_stmt))
1282 combined_fn log_cfn;
1283 built_in_function exp_bfn;
1284 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1286 case BUILT_IN_POW:
1287 log_cfn = CFN_BUILT_IN_LOG;
1288 exp_bfn = BUILT_IN_EXP;
1289 break;
1290 case BUILT_IN_POWF:
1291 log_cfn = CFN_BUILT_IN_LOGF;
1292 exp_bfn = BUILT_IN_EXPF;
1293 break;
1294 case BUILT_IN_POWL:
1295 log_cfn = CFN_BUILT_IN_LOGL;
1296 exp_bfn = BUILT_IN_EXPL;
1297 break;
1298 default:
1299 return NULL;
1301 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1302 tree exp_decl = builtin_decl_implicit (exp_bfn);
1303 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1304 does that, but if C is a power of 2, we want to use
1305 exp2 (log2 (C) * x) in the non-vectorized version, but for
1306 vectorization we don't have vectorized exp2. */
1307 if (logc
1308 && TREE_CODE (logc) == REAL_CST
1309 && exp_decl
1310 && lookup_attribute ("omp declare simd",
1311 DECL_ATTRIBUTES (exp_decl)))
1313 cgraph_node *node = cgraph_node::get_create (exp_decl);
1314 if (node->simd_clones == NULL)
1316 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1317 || node->definition)
1318 return NULL;
1319 expand_simd_clones (node);
1320 if (node->simd_clones == NULL)
1321 return NULL;
1323 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1324 if (!*type_out)
1325 return NULL;
1326 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1327 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1328 append_pattern_def_seq (stmt_vinfo, g);
1329 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1330 g = gimple_build_call (exp_decl, 1, def);
1331 gimple_call_set_lhs (g, res);
1332 return g;
1336 return NULL;
1339 /* We now have a pow or powi builtin function call with a constant
1340 exponent. */
1342 /* Catch squaring. */
1343 if ((tree_fits_shwi_p (exp)
1344 && tree_to_shwi (exp) == 2)
1345 || (TREE_CODE (exp) == REAL_CST
1346 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1348 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1349 TREE_TYPE (base), type_out))
1350 return NULL;
1352 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1353 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1354 return stmt;
1357 /* Catch square root. */
1358 if (TREE_CODE (exp) == REAL_CST
1359 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1361 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1362 if (*type_out
1363 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1364 OPTIMIZE_FOR_SPEED))
1366 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1367 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1368 gimple_call_set_lhs (stmt, var);
1369 gimple_call_set_nothrow (stmt, true);
1370 return stmt;
1374 return NULL;
1378 /* Function vect_recog_widen_sum_pattern
1380 Try to find the following pattern:
1382 type x_t;
1383 TYPE x_T, sum = init;
1384 loop:
1385 sum_0 = phi <init, sum_1>
1386 S1 x_t = *p;
1387 S2 x_T = (TYPE) x_t;
1388 S3 sum_1 = x_T + sum_0;
1390 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1391 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1392 a special case of a reduction computation.
1394 Input:
1396 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1397 when this function is called with S3, the pattern {S2,S3} will be detected.
1399 Output:
1401 * TYPE_OUT: The type of the output of this pattern.
1403 * Return value: A new stmt that will be used to replace the sequence of
1404 stmts that constitute the pattern. In this case it will be:
1405 WIDEN_SUM <x_t, sum_0>
1407 Note: The widening-sum idiom is a widening reduction pattern that is
1408 vectorized without preserving all the intermediate results. It
1409 produces only N/2 (widened) results (by summing up pairs of
1410 intermediate results) rather than all N results. Therefore, we
1411 cannot allow this pattern when we want to get all the results and in
1412 the correct order (as is the case when this computation is in an
1413 inner-loop nested in an outer-loop that us being vectorized). */
1415 static gimple *
1416 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1418 gimple *last_stmt = stmt_vinfo->stmt;
1419 tree oprnd0, oprnd1;
1420 vec_info *vinfo = stmt_vinfo->vinfo;
1421 tree type;
1422 gimple *pattern_stmt;
1423 tree var;
1425 /* Look for the following pattern
1426 DX = (TYPE) X;
1427 sum_1 = DX + sum_0;
1428 In which DX is at least double the size of X, and sum_1 has been
1429 recognized as a reduction variable.
1432 /* Starting from LAST_STMT, follow the defs of its uses in search
1433 of the above pattern. */
1435 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1436 &oprnd0, &oprnd1))
1437 return NULL;
1439 type = gimple_expr_type (last_stmt);
1441 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1442 we know that oprnd1 is the reduction variable (defined by a loop-header
1443 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1444 Left to check that oprnd0 is defined by a cast from type 'type' to type
1445 'TYPE'. */
1447 vect_unpromoted_value unprom0;
1448 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1449 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1450 return NULL;
1452 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1454 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1455 type_out))
1456 return NULL;
1458 var = vect_recog_temp_ssa_var (type, NULL);
1459 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1461 return pattern_stmt;
1464 /* Recognize cases in which an operation is performed in one type WTYPE
1465 but could be done more efficiently in a narrower type NTYPE. For example,
1466 if we have:
1468 ATYPE a; // narrower than NTYPE
1469 BTYPE b; // narrower than NTYPE
1470 WTYPE aw = (WTYPE) a;
1471 WTYPE bw = (WTYPE) b;
1472 WTYPE res = aw + bw; // only uses of aw and bw
1474 then it would be more efficient to do:
1476 NTYPE an = (NTYPE) a;
1477 NTYPE bn = (NTYPE) b;
1478 NTYPE resn = an + bn;
1479 WTYPE res = (WTYPE) resn;
1481 Other situations include things like:
1483 ATYPE a; // NTYPE or narrower
1484 WTYPE aw = (WTYPE) a;
1485 WTYPE res = aw + b;
1487 when only "(NTYPE) res" is significant. In that case it's more efficient
1488 to truncate "b" and do the operation on NTYPE instead:
1490 NTYPE an = (NTYPE) a;
1491 NTYPE bn = (NTYPE) b; // truncation
1492 NTYPE resn = an + bn;
1493 WTYPE res = (WTYPE) resn;
1495 All users of "res" should then use "resn" instead, making the final
1496 statement dead (not marked as relevant). The final statement is still
1497 needed to maintain the type correctness of the IR.
1499 vect_determine_precisions has already determined the minimum
1500 precison of the operation and the minimum precision required
1501 by users of the result. */
1503 static gimple *
1504 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1506 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1507 if (!last_stmt)
1508 return NULL;
1510 /* See whether we have found that this operation can be done on a
1511 narrower type without changing its semantics. */
1512 unsigned int new_precision = last_stmt_info->operation_precision;
1513 if (!new_precision)
1514 return NULL;
1516 vec_info *vinfo = last_stmt_info->vinfo;
1517 tree lhs = gimple_assign_lhs (last_stmt);
1518 tree type = TREE_TYPE (lhs);
1519 tree_code code = gimple_assign_rhs_code (last_stmt);
1521 /* Keep the first operand of a COND_EXPR as-is: only the other two
1522 operands are interesting. */
1523 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1525 /* Check the operands. */
1526 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1527 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1528 unprom.quick_grow (nops);
1529 unsigned int min_precision = 0;
1530 bool single_use_p = false;
1531 for (unsigned int i = 0; i < nops; ++i)
1533 tree op = gimple_op (last_stmt, first_op + i);
1534 if (TREE_CODE (op) == INTEGER_CST)
1535 unprom[i].set_op (op, vect_constant_def);
1536 else if (TREE_CODE (op) == SSA_NAME)
1538 bool op_single_use_p = true;
1539 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1540 &op_single_use_p))
1541 return NULL;
1542 /* If:
1544 (1) N bits of the result are needed;
1545 (2) all inputs are widened from M<N bits; and
1546 (3) one operand OP is a single-use SSA name
1548 we can shift the M->N widening from OP to the output
1549 without changing the number or type of extensions involved.
1550 This then reduces the number of copies of STMT_INFO.
1552 If instead of (3) more than one operand is a single-use SSA name,
1553 shifting the extension to the output is even more of a win.
1555 If instead:
1557 (1) N bits of the result are needed;
1558 (2) one operand OP2 is widened from M2<N bits;
1559 (3) another operand OP1 is widened from M1<M2 bits; and
1560 (4) both OP1 and OP2 are single-use
1562 the choice is between:
1564 (a) truncating OP2 to M1, doing the operation on M1,
1565 and then widening the result to N
1567 (b) widening OP1 to M2, doing the operation on M2, and then
1568 widening the result to N
1570 Both shift the M2->N widening of the inputs to the output.
1571 (a) additionally shifts the M1->M2 widening to the output;
1572 it requires fewer copies of STMT_INFO but requires an extra
1573 M2->M1 truncation.
1575 Which is better will depend on the complexity and cost of
1576 STMT_INFO, which is hard to predict at this stage. However,
1577 a clear tie-breaker in favor of (b) is the fact that the
1578 truncation in (a) increases the length of the operation chain.
1580 If instead of (4) only one of OP1 or OP2 is single-use,
1581 (b) is still a win over doing the operation in N bits:
1582 it still shifts the M2->N widening on the single-use operand
1583 to the output and reduces the number of STMT_INFO copies.
1585 If neither operand is single-use then operating on fewer than
1586 N bits might lead to more extensions overall. Whether it does
1587 or not depends on global information about the vectorization
1588 region, and whether that's a good trade-off would again
1589 depend on the complexity and cost of the statements involved,
1590 as well as things like register pressure that are not normally
1591 modelled at this stage. We therefore ignore these cases
1592 and just optimize the clear single-use wins above.
1594 Thus we take the maximum precision of the unpromoted operands
1595 and record whether any operand is single-use. */
1596 if (unprom[i].dt == vect_internal_def)
1598 min_precision = MAX (min_precision,
1599 TYPE_PRECISION (unprom[i].type));
1600 single_use_p |= op_single_use_p;
1605 /* Although the operation could be done in operation_precision, we have
1606 to balance that against introducing extra truncations or extensions.
1607 Calculate the minimum precision that can be handled efficiently.
1609 The loop above determined that the operation could be handled
1610 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1611 extension from the inputs to the output without introducing more
1612 instructions, and would reduce the number of instructions required
1613 for STMT_INFO itself.
1615 vect_determine_precisions has also determined that the result only
1616 needs min_output_precision bits. Truncating by a factor of N times
1617 requires a tree of N - 1 instructions, so if TYPE is N times wider
1618 than min_output_precision, doing the operation in TYPE and truncating
1619 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1620 In contrast:
1622 - truncating the input to a unary operation and doing the operation
1623 in the new type requires at most N - 1 + 1 = N instructions per
1624 output vector
1626 - doing the same for a binary operation requires at most
1627 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1629 Both unary and binary operations require fewer instructions than
1630 this if the operands were extended from a suitable truncated form.
1631 Thus there is usually nothing to lose by doing operations in
1632 min_output_precision bits, but there can be something to gain. */
1633 if (!single_use_p)
1634 min_precision = last_stmt_info->min_output_precision;
1635 else
1636 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1638 /* Apply the minimum efficient precision we just calculated. */
1639 if (new_precision < min_precision)
1640 new_precision = min_precision;
1641 if (new_precision >= TYPE_PRECISION (type))
1642 return NULL;
1644 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1646 *type_out = get_vectype_for_scalar_type (type);
1647 if (!*type_out)
1648 return NULL;
1650 /* We've found a viable pattern. Get the new type of the operation. */
1651 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1652 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1654 /* We specifically don't check here whether the target supports the
1655 new operation, since it might be something that a later pattern
1656 wants to rewrite anyway. If targets have a minimum element size
1657 for some optabs, we should pattern-match smaller ops to larger ops
1658 where beneficial. */
1659 tree new_vectype = get_vectype_for_scalar_type (new_type);
1660 if (!new_vectype)
1661 return NULL;
1663 if (dump_enabled_p ())
1665 dump_printf_loc (MSG_NOTE, vect_location, "demoting ");
1666 dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
1667 dump_printf (MSG_NOTE, " to ");
1668 dump_generic_expr (MSG_NOTE, TDF_SLIM, new_type);
1669 dump_printf (MSG_NOTE, "\n");
1672 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1673 tree ops[3] = {};
1674 for (unsigned int i = 1; i < first_op; ++i)
1675 ops[i - 1] = gimple_op (last_stmt, i);
1676 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1677 new_type, &unprom[0], new_vectype);
1679 /* Use the operation to produce a result of type NEW_TYPE. */
1680 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1681 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1682 ops[0], ops[1], ops[2]);
1683 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1685 if (dump_enabled_p ())
1687 dump_printf_loc (MSG_NOTE, vect_location,
1688 "created pattern stmt: ");
1689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1692 pattern_stmt = vect_convert_output (last_stmt_info, type,
1693 pattern_stmt, new_vectype);
1695 return pattern_stmt;
1698 /* Recognize the patterns:
1700 ATYPE a; // narrower than TYPE
1701 BTYPE b; // narrower than TYPE
1702 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1703 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1705 where only the bottom half of avg is used. Try to transform them into:
1707 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1708 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1710 followed by:
1712 TYPE avg = (TYPE) avg';
1714 where NTYPE is no wider than half of TYPE. Since only the bottom half
1715 of avg is used, all or part of the cast of avg' should become redundant. */
1717 static gimple *
1718 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1720 /* Check for a shift right by one bit. */
1721 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1722 vec_info *vinfo = last_stmt_info->vinfo;
1723 if (!last_stmt
1724 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1725 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1726 return NULL;
1728 /* Check that the shift result is wider than the users of the
1729 result need (i.e. that narrowing would be a natural choice). */
1730 tree lhs = gimple_assign_lhs (last_stmt);
1731 tree type = TREE_TYPE (lhs);
1732 unsigned int target_precision
1733 = vect_element_precision (last_stmt_info->min_output_precision);
1734 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1735 return NULL;
1737 /* Get the definition of the shift input. */
1738 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1739 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1740 if (!plus_stmt_info)
1741 return NULL;
1743 /* Check whether the shift input can be seen as a tree of additions on
1744 2 or 3 widened inputs.
1746 Note that the pattern should be a win even if the result of one or
1747 more additions is reused elsewhere: if the pattern matches, we'd be
1748 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1749 internal_fn ifn = IFN_AVG_FLOOR;
1750 vect_unpromoted_value unprom[3];
1751 tree new_type;
1752 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1753 PLUS_EXPR, false, 3,
1754 unprom, &new_type);
1755 if (nops == 0)
1756 return NULL;
1757 if (nops == 3)
1759 /* Check that one operand is 1. */
1760 unsigned int i;
1761 for (i = 0; i < 3; ++i)
1762 if (integer_onep (unprom[i].op))
1763 break;
1764 if (i == 3)
1765 return NULL;
1766 /* Throw away the 1 operand and keep the other two. */
1767 if (i < 2)
1768 unprom[i] = unprom[2];
1769 ifn = IFN_AVG_CEIL;
1772 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1774 /* We know that:
1776 (a) the operation can be viewed as:
1778 TYPE widened0 = (TYPE) UNPROM[0];
1779 TYPE widened1 = (TYPE) UNPROM[1];
1780 TYPE tmp1 = widened0 + widened1 {+ 1};
1781 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1783 (b) the first two statements are equivalent to:
1785 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1786 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1788 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1789 where sensible;
1791 (d) all the operations can be performed correctly at twice the width of
1792 NEW_TYPE, due to the nature of the average operation; and
1794 (e) users of the result of the right shift need only TARGET_PRECISION
1795 bits, where TARGET_PRECISION is no more than half of TYPE's
1796 precision.
1798 Under these circumstances, the only situation in which NEW_TYPE
1799 could be narrower than TARGET_PRECISION is if widened0, widened1
1800 and an addition result are all used more than once. Thus we can
1801 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1802 as "free", whereas widening the result of the average instruction
1803 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1804 therefore better not to go narrower than TARGET_PRECISION. */
1805 if (TYPE_PRECISION (new_type) < target_precision)
1806 new_type = build_nonstandard_integer_type (target_precision,
1807 TYPE_UNSIGNED (new_type));
1809 /* Check for target support. */
1810 tree new_vectype = get_vectype_for_scalar_type (new_type);
1811 if (!new_vectype
1812 || !direct_internal_fn_supported_p (ifn, new_vectype,
1813 OPTIMIZE_FOR_SPEED))
1814 return NULL;
1816 /* The IR requires a valid vector type for the cast result, even though
1817 it's likely to be discarded. */
1818 *type_out = get_vectype_for_scalar_type (type);
1819 if (!*type_out)
1820 return NULL;
1822 /* Generate the IFN_AVG* call. */
1823 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1824 tree new_ops[2];
1825 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1826 unprom, new_vectype);
1827 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1828 new_ops[1]);
1829 gimple_call_set_lhs (average_stmt, new_var);
1830 gimple_set_location (average_stmt, gimple_location (last_stmt));
1832 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_NOTE, vect_location,
1835 "created pattern stmt: ");
1836 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0);
1839 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1842 /* Recognize cases in which the input to a cast is wider than its
1843 output, and the input is fed by a widening operation. Fold this
1844 by removing the unnecessary intermediate widening. E.g.:
1846 unsigned char a;
1847 unsigned int b = (unsigned int) a;
1848 unsigned short c = (unsigned short) b;
1852 unsigned short c = (unsigned short) a;
1854 Although this is rare in input IR, it is an expected side-effect
1855 of the over-widening pattern above.
1857 This is beneficial also for integer-to-float conversions, if the
1858 widened integer has more bits than the float, and if the unwidened
1859 input doesn't. */
1861 static gimple *
1862 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1864 /* Check for a cast, including an integer-to-float conversion. */
1865 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1866 if (!last_stmt)
1867 return NULL;
1868 tree_code code = gimple_assign_rhs_code (last_stmt);
1869 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1870 return NULL;
1872 /* Make sure that the rhs is a scalar with a natural bitsize. */
1873 tree lhs = gimple_assign_lhs (last_stmt);
1874 if (!lhs)
1875 return NULL;
1876 tree lhs_type = TREE_TYPE (lhs);
1877 scalar_mode lhs_mode;
1878 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1879 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1880 return NULL;
1882 /* Check for a narrowing operation (from a vector point of view). */
1883 tree rhs = gimple_assign_rhs1 (last_stmt);
1884 tree rhs_type = TREE_TYPE (rhs);
1885 if (!INTEGRAL_TYPE_P (rhs_type)
1886 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1887 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1888 return NULL;
1890 /* Try to find an unpromoted input. */
1891 vec_info *vinfo = last_stmt_info->vinfo;
1892 vect_unpromoted_value unprom;
1893 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1894 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1895 return NULL;
1897 /* If the bits above RHS_TYPE matter, make sure that they're the
1898 same when extending from UNPROM as they are when extending from RHS. */
1899 if (!INTEGRAL_TYPE_P (lhs_type)
1900 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1901 return NULL;
1903 /* We can get the same result by casting UNPROM directly, to avoid
1904 the unnecessary widening and narrowing. */
1905 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1907 *type_out = get_vectype_for_scalar_type (lhs_type);
1908 if (!*type_out)
1909 return NULL;
1911 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1912 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1913 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1915 return pattern_stmt;
1918 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1919 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1921 static gimple *
1922 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1924 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
1925 WIDEN_LSHIFT_EXPR, true,
1926 "vect_recog_widen_shift_pattern");
1929 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1931 type a_t, b_t, c_t;
1933 S0 a_t = b_t r<< c_t;
1935 Input/Output:
1937 * STMT_VINFO: The stmt from which the pattern search begins,
1938 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1939 with a sequence:
1941 S1 d_t = -c_t;
1942 S2 e_t = d_t & (B - 1);
1943 S3 f_t = b_t << c_t;
1944 S4 g_t = b_t >> e_t;
1945 S0 a_t = f_t | g_t;
1947 where B is element bitsize of type.
1949 Output:
1951 * TYPE_OUT: The type of the output of this pattern.
1953 * Return value: A new stmt that will be used to replace the rotate
1954 S0 stmt. */
1956 static gimple *
1957 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1959 gimple *last_stmt = stmt_vinfo->stmt;
1960 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1961 gimple *pattern_stmt, *def_stmt;
1962 enum tree_code rhs_code;
1963 vec_info *vinfo = stmt_vinfo->vinfo;
1964 enum vect_def_type dt;
1965 optab optab1, optab2;
1966 edge ext_def = NULL;
1968 if (!is_gimple_assign (last_stmt))
1969 return NULL;
1971 rhs_code = gimple_assign_rhs_code (last_stmt);
1972 switch (rhs_code)
1974 case LROTATE_EXPR:
1975 case RROTATE_EXPR:
1976 break;
1977 default:
1978 return NULL;
1981 lhs = gimple_assign_lhs (last_stmt);
1982 oprnd0 = gimple_assign_rhs1 (last_stmt);
1983 type = TREE_TYPE (oprnd0);
1984 oprnd1 = gimple_assign_rhs2 (last_stmt);
1985 if (TREE_CODE (oprnd0) != SSA_NAME
1986 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1987 || !INTEGRAL_TYPE_P (type)
1988 || !TYPE_UNSIGNED (type))
1989 return NULL;
1991 stmt_vec_info def_stmt_info;
1992 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
1993 return NULL;
1995 if (dt != vect_internal_def
1996 && dt != vect_constant_def
1997 && dt != vect_external_def)
1998 return NULL;
2000 vectype = get_vectype_for_scalar_type (type);
2001 if (vectype == NULL_TREE)
2002 return NULL;
2004 /* If vector/vector or vector/scalar rotate is supported by the target,
2005 don't do anything here. */
2006 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2007 if (optab1
2008 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2009 return NULL;
2011 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2013 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2014 if (optab2
2015 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2016 return NULL;
2019 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2020 don't do anything here either. */
2021 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2022 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2023 if (!optab1
2024 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2025 || !optab2
2026 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2028 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2029 return NULL;
2030 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2031 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2032 if (!optab1
2033 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2034 || !optab2
2035 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2036 return NULL;
2039 *type_out = vectype;
2041 if (dt == vect_external_def
2042 && TREE_CODE (oprnd1) == SSA_NAME)
2043 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2045 def = NULL_TREE;
2046 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2047 if (TREE_CODE (oprnd1) == INTEGER_CST
2048 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2049 def = oprnd1;
2050 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2052 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2053 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2054 && TYPE_PRECISION (TREE_TYPE (rhs1))
2055 == TYPE_PRECISION (type))
2056 def = rhs1;
2059 if (def == NULL_TREE)
2061 def = vect_recog_temp_ssa_var (type, NULL);
2062 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2063 if (ext_def)
2065 basic_block new_bb
2066 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2067 gcc_assert (!new_bb);
2069 else
2070 append_pattern_def_seq (stmt_vinfo, def_stmt);
2072 stype = TREE_TYPE (def);
2073 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2075 if (TREE_CODE (def) == INTEGER_CST)
2077 if (!tree_fits_uhwi_p (def)
2078 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2079 || integer_zerop (def))
2080 return NULL;
2081 def2 = build_int_cst (stype,
2082 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2084 else
2086 tree vecstype = get_vectype_for_scalar_type (stype);
2088 if (vecstype == NULL_TREE)
2089 return NULL;
2090 def2 = vect_recog_temp_ssa_var (stype, NULL);
2091 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2092 if (ext_def)
2094 basic_block new_bb
2095 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2096 gcc_assert (!new_bb);
2098 else
2099 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2101 def2 = vect_recog_temp_ssa_var (stype, NULL);
2102 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2103 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2104 gimple_assign_lhs (def_stmt), mask);
2105 if (ext_def)
2107 basic_block new_bb
2108 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2109 gcc_assert (!new_bb);
2111 else
2112 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2115 var1 = vect_recog_temp_ssa_var (type, NULL);
2116 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2117 ? LSHIFT_EXPR : RSHIFT_EXPR,
2118 oprnd0, def);
2119 append_pattern_def_seq (stmt_vinfo, def_stmt);
2121 var2 = vect_recog_temp_ssa_var (type, NULL);
2122 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2123 ? RSHIFT_EXPR : LSHIFT_EXPR,
2124 oprnd0, def2);
2125 append_pattern_def_seq (stmt_vinfo, def_stmt);
2127 /* Pattern detected. */
2128 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2130 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2131 var = vect_recog_temp_ssa_var (type, NULL);
2132 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2134 return pattern_stmt;
2137 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2138 vectorized:
2140 type a_t;
2141 TYPE b_T, res_T;
2143 S1 a_t = ;
2144 S2 b_T = ;
2145 S3 res_T = b_T op a_t;
2147 where type 'TYPE' is a type with different size than 'type',
2148 and op is <<, >> or rotate.
2150 Also detect cases:
2152 type a_t;
2153 TYPE b_T, c_T, res_T;
2155 S0 c_T = ;
2156 S1 a_t = (type) c_T;
2157 S2 b_T = ;
2158 S3 res_T = b_T op a_t;
2160 Input/Output:
2162 * STMT_VINFO: The stmt from which the pattern search begins,
2163 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2164 with a shift/rotate which has same type on both operands, in the
2165 second case just b_T op c_T, in the first case with added cast
2166 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2168 Output:
2170 * TYPE_OUT: The type of the output of this pattern.
2172 * Return value: A new stmt that will be used to replace the shift/rotate
2173 S3 stmt. */
2175 static gimple *
2176 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2177 tree *type_out)
2179 gimple *last_stmt = stmt_vinfo->stmt;
2180 tree oprnd0, oprnd1, lhs, var;
2181 gimple *pattern_stmt;
2182 enum tree_code rhs_code;
2183 vec_info *vinfo = stmt_vinfo->vinfo;
2185 if (!is_gimple_assign (last_stmt))
2186 return NULL;
2188 rhs_code = gimple_assign_rhs_code (last_stmt);
2189 switch (rhs_code)
2191 case LSHIFT_EXPR:
2192 case RSHIFT_EXPR:
2193 case LROTATE_EXPR:
2194 case RROTATE_EXPR:
2195 break;
2196 default:
2197 return NULL;
2200 lhs = gimple_assign_lhs (last_stmt);
2201 oprnd0 = gimple_assign_rhs1 (last_stmt);
2202 oprnd1 = gimple_assign_rhs2 (last_stmt);
2203 if (TREE_CODE (oprnd0) != SSA_NAME
2204 || TREE_CODE (oprnd1) != SSA_NAME
2205 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2206 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2207 || TYPE_PRECISION (TREE_TYPE (lhs))
2208 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2209 return NULL;
2211 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2212 if (!def_vinfo)
2213 return NULL;
2215 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2216 if (*type_out == NULL_TREE)
2217 return NULL;
2219 tree def = NULL_TREE;
2220 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2221 if (def_stmt && gimple_assign_cast_p (def_stmt))
2223 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2224 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2225 && TYPE_PRECISION (TREE_TYPE (rhs1))
2226 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2228 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2229 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2230 def = rhs1;
2231 else
2233 tree mask
2234 = build_low_bits_mask (TREE_TYPE (rhs1),
2235 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2236 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2237 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2238 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2239 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2244 if (def == NULL_TREE)
2246 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2247 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2248 append_pattern_def_seq (stmt_vinfo, def_stmt);
2251 /* Pattern detected. */
2252 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2254 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2255 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2256 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2258 return pattern_stmt;
2261 /* Return true iff the target has a vector optab implementing the operation
2262 CODE on type VECTYPE. */
2264 static bool
2265 target_has_vecop_for_code (tree_code code, tree vectype)
2267 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2268 return voptab
2269 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2272 /* Verify that the target has optabs of VECTYPE to perform all the steps
2273 needed by the multiplication-by-immediate synthesis algorithm described by
2274 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2275 present. Return true iff the target supports all the steps. */
2277 static bool
2278 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2279 tree vectype, bool synth_shift_p)
2281 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2282 return false;
2284 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2285 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2287 if (var == negate_variant
2288 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2289 return false;
2291 /* If we must synthesize shifts with additions make sure that vector
2292 addition is available. */
2293 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2294 return false;
2296 for (int i = 1; i < alg->ops; i++)
2298 switch (alg->op[i])
2300 case alg_shift:
2301 break;
2302 case alg_add_t_m2:
2303 case alg_add_t2_m:
2304 case alg_add_factor:
2305 if (!supports_vplus)
2306 return false;
2307 break;
2308 case alg_sub_t_m2:
2309 case alg_sub_t2_m:
2310 case alg_sub_factor:
2311 if (!supports_vminus)
2312 return false;
2313 break;
2314 case alg_unknown:
2315 case alg_m:
2316 case alg_zero:
2317 case alg_impossible:
2318 return false;
2319 default:
2320 gcc_unreachable ();
2324 return true;
2327 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2328 putting the final result in DEST. Append all statements but the last into
2329 VINFO. Return the last statement. */
2331 static gimple *
2332 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2333 stmt_vec_info vinfo)
2335 HOST_WIDE_INT i;
2336 tree itype = TREE_TYPE (op);
2337 tree prev_res = op;
2338 gcc_assert (amnt >= 0);
2339 for (i = 0; i < amnt; i++)
2341 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2342 : dest;
2343 gimple *stmt
2344 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2345 prev_res = tmp_var;
2346 if (i < amnt - 1)
2347 append_pattern_def_seq (vinfo, stmt);
2348 else
2349 return stmt;
2351 gcc_unreachable ();
2352 return NULL;
2355 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2356 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2357 the process if necessary. Append the resulting assignment statements
2358 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2359 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2360 left shifts using additions. */
2362 static tree
2363 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2364 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2366 if (integer_zerop (op2)
2367 && (code == LSHIFT_EXPR
2368 || code == PLUS_EXPR))
2370 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2371 return op1;
2374 gimple *stmt;
2375 tree itype = TREE_TYPE (op1);
2376 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2378 if (code == LSHIFT_EXPR
2379 && synth_shift_p)
2381 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2382 stmt_vinfo);
2383 append_pattern_def_seq (stmt_vinfo, stmt);
2384 return tmp_var;
2387 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2388 append_pattern_def_seq (stmt_vinfo, stmt);
2389 return tmp_var;
2392 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2393 and simple arithmetic operations to be vectorized. Record the statements
2394 produced in STMT_VINFO and return the last statement in the sequence or
2395 NULL if it's not possible to synthesize such a multiplication.
2396 This function mirrors the behavior of expand_mult_const in expmed.c but
2397 works on tree-ssa form. */
2399 static gimple *
2400 vect_synth_mult_by_constant (tree op, tree val,
2401 stmt_vec_info stmt_vinfo)
2403 tree itype = TREE_TYPE (op);
2404 machine_mode mode = TYPE_MODE (itype);
2405 struct algorithm alg;
2406 mult_variant variant;
2407 if (!tree_fits_shwi_p (val))
2408 return NULL;
2410 /* Multiplication synthesis by shifts, adds and subs can introduce
2411 signed overflow where the original operation didn't. Perform the
2412 operations on an unsigned type and cast back to avoid this.
2413 In the future we may want to relax this for synthesis algorithms
2414 that we can prove do not cause unexpected overflow. */
2415 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2417 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2419 /* Targets that don't support vector shifts but support vector additions
2420 can synthesize shifts that way. */
2421 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2423 HOST_WIDE_INT hwval = tree_to_shwi (val);
2424 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2425 The vectorizer's benefit analysis will decide whether it's beneficial
2426 to do this. */
2427 bool possible = choose_mult_variant (mode, hwval, &alg,
2428 &variant, MAX_COST);
2429 if (!possible)
2430 return NULL;
2432 tree vectype = get_vectype_for_scalar_type (multtype);
2434 if (!vectype
2435 || !target_supports_mult_synth_alg (&alg, variant,
2436 vectype, synth_shift_p))
2437 return NULL;
2439 tree accumulator;
2441 /* Clear out the sequence of statements so we can populate it below. */
2442 gimple *stmt = NULL;
2444 if (cast_to_unsigned_p)
2446 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2447 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2448 append_pattern_def_seq (stmt_vinfo, stmt);
2449 op = tmp_op;
2452 if (alg.op[0] == alg_zero)
2453 accumulator = build_int_cst (multtype, 0);
2454 else
2455 accumulator = op;
2457 bool needs_fixup = (variant == negate_variant)
2458 || (variant == add_variant);
2460 for (int i = 1; i < alg.ops; i++)
2462 tree shft_log = build_int_cst (multtype, alg.log[i]);
2463 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2464 tree tmp_var = NULL_TREE;
2466 switch (alg.op[i])
2468 case alg_shift:
2469 if (synth_shift_p)
2470 stmt
2471 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2472 stmt_vinfo);
2473 else
2474 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2475 shft_log);
2476 break;
2477 case alg_add_t_m2:
2478 tmp_var
2479 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2480 stmt_vinfo, synth_shift_p);
2481 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2482 tmp_var);
2483 break;
2484 case alg_sub_t_m2:
2485 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2486 shft_log, stmt_vinfo,
2487 synth_shift_p);
2488 /* In some algorithms the first step involves zeroing the
2489 accumulator. If subtracting from such an accumulator
2490 just emit the negation directly. */
2491 if (integer_zerop (accumulator))
2492 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2493 else
2494 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2495 tmp_var);
2496 break;
2497 case alg_add_t2_m:
2498 tmp_var
2499 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2500 stmt_vinfo, synth_shift_p);
2501 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2502 break;
2503 case alg_sub_t2_m:
2504 tmp_var
2505 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2506 stmt_vinfo, synth_shift_p);
2507 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2508 break;
2509 case alg_add_factor:
2510 tmp_var
2511 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2512 stmt_vinfo, synth_shift_p);
2513 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2514 tmp_var);
2515 break;
2516 case alg_sub_factor:
2517 tmp_var
2518 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2519 stmt_vinfo, synth_shift_p);
2520 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2521 accumulator);
2522 break;
2523 default:
2524 gcc_unreachable ();
2526 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2527 but rather return it directly. */
2529 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2530 append_pattern_def_seq (stmt_vinfo, stmt);
2531 accumulator = accum_tmp;
2533 if (variant == negate_variant)
2535 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2536 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2537 accumulator = accum_tmp;
2538 if (cast_to_unsigned_p)
2539 append_pattern_def_seq (stmt_vinfo, stmt);
2541 else if (variant == add_variant)
2543 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2544 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2545 accumulator = accum_tmp;
2546 if (cast_to_unsigned_p)
2547 append_pattern_def_seq (stmt_vinfo, stmt);
2549 /* Move back to a signed if needed. */
2550 if (cast_to_unsigned_p)
2552 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2553 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2556 return stmt;
2559 /* Detect multiplication by constant and convert it into a sequence of
2560 shifts and additions, subtractions, negations. We reuse the
2561 choose_mult_variant algorithms from expmed.c
2563 Input/Output:
2565 STMT_VINFO: The stmt from which the pattern search begins,
2566 i.e. the mult stmt.
2568 Output:
2570 * TYPE_OUT: The type of the output of this pattern.
2572 * Return value: A new stmt that will be used to replace
2573 the multiplication. */
2575 static gimple *
2576 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2578 gimple *last_stmt = stmt_vinfo->stmt;
2579 tree oprnd0, oprnd1, vectype, itype;
2580 gimple *pattern_stmt;
2582 if (!is_gimple_assign (last_stmt))
2583 return NULL;
2585 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2586 return NULL;
2588 oprnd0 = gimple_assign_rhs1 (last_stmt);
2589 oprnd1 = gimple_assign_rhs2 (last_stmt);
2590 itype = TREE_TYPE (oprnd0);
2592 if (TREE_CODE (oprnd0) != SSA_NAME
2593 || TREE_CODE (oprnd1) != INTEGER_CST
2594 || !INTEGRAL_TYPE_P (itype)
2595 || !type_has_mode_precision_p (itype))
2596 return NULL;
2598 vectype = get_vectype_for_scalar_type (itype);
2599 if (vectype == NULL_TREE)
2600 return NULL;
2602 /* If the target can handle vectorized multiplication natively,
2603 don't attempt to optimize this. */
2604 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2605 if (mul_optab != unknown_optab)
2607 machine_mode vec_mode = TYPE_MODE (vectype);
2608 int icode = (int) optab_handler (mul_optab, vec_mode);
2609 if (icode != CODE_FOR_nothing)
2610 return NULL;
2613 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2614 if (!pattern_stmt)
2615 return NULL;
2617 /* Pattern detected. */
2618 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2620 *type_out = vectype;
2622 return pattern_stmt;
2625 /* Detect a signed division by a constant that wouldn't be
2626 otherwise vectorized:
2628 type a_t, b_t;
2630 S1 a_t = b_t / N;
2632 where type 'type' is an integral type and N is a constant.
2634 Similarly handle modulo by a constant:
2636 S4 a_t = b_t % N;
2638 Input/Output:
2640 * STMT_VINFO: The stmt from which the pattern search begins,
2641 i.e. the division stmt. S1 is replaced by if N is a power
2642 of two constant and type is signed:
2643 S3 y_t = b_t < 0 ? N - 1 : 0;
2644 S2 x_t = b_t + y_t;
2645 S1' a_t = x_t >> log2 (N);
2647 S4 is replaced if N is a power of two constant and
2648 type is signed by (where *_T temporaries have unsigned type):
2649 S9 y_T = b_t < 0 ? -1U : 0U;
2650 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2651 S7 z_t = (type) z_T;
2652 S6 w_t = b_t + z_t;
2653 S5 x_t = w_t & (N - 1);
2654 S4' a_t = x_t - z_t;
2656 Output:
2658 * TYPE_OUT: The type of the output of this pattern.
2660 * Return value: A new stmt that will be used to replace the division
2661 S1 or modulo S4 stmt. */
2663 static gimple *
2664 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2666 gimple *last_stmt = stmt_vinfo->stmt;
2667 tree oprnd0, oprnd1, vectype, itype, cond;
2668 gimple *pattern_stmt, *def_stmt;
2669 enum tree_code rhs_code;
2670 optab optab;
2671 tree q;
2672 int dummy_int, prec;
2674 if (!is_gimple_assign (last_stmt))
2675 return NULL;
2677 rhs_code = gimple_assign_rhs_code (last_stmt);
2678 switch (rhs_code)
2680 case TRUNC_DIV_EXPR:
2681 case EXACT_DIV_EXPR:
2682 case TRUNC_MOD_EXPR:
2683 break;
2684 default:
2685 return NULL;
2688 oprnd0 = gimple_assign_rhs1 (last_stmt);
2689 oprnd1 = gimple_assign_rhs2 (last_stmt);
2690 itype = TREE_TYPE (oprnd0);
2691 if (TREE_CODE (oprnd0) != SSA_NAME
2692 || TREE_CODE (oprnd1) != INTEGER_CST
2693 || TREE_CODE (itype) != INTEGER_TYPE
2694 || !type_has_mode_precision_p (itype))
2695 return NULL;
2697 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2698 vectype = get_vectype_for_scalar_type (itype);
2699 if (vectype == NULL_TREE)
2700 return NULL;
2702 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2704 /* If the target can handle vectorized division or modulo natively,
2705 don't attempt to optimize this, since native division is likely
2706 to give smaller code. */
2707 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2708 if (optab != unknown_optab)
2710 machine_mode vec_mode = TYPE_MODE (vectype);
2711 int icode = (int) optab_handler (optab, vec_mode);
2712 if (icode != CODE_FOR_nothing)
2713 return NULL;
2717 prec = TYPE_PRECISION (itype);
2718 if (integer_pow2p (oprnd1))
2720 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2721 return NULL;
2723 /* Pattern detected. */
2724 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2726 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2727 build_int_cst (itype, 0));
2728 if (rhs_code == TRUNC_DIV_EXPR
2729 || rhs_code == EXACT_DIV_EXPR)
2731 tree var = vect_recog_temp_ssa_var (itype, NULL);
2732 tree shift;
2733 def_stmt
2734 = gimple_build_assign (var, COND_EXPR, cond,
2735 fold_build2 (MINUS_EXPR, itype, oprnd1,
2736 build_int_cst (itype, 1)),
2737 build_int_cst (itype, 0));
2738 append_pattern_def_seq (stmt_vinfo, def_stmt);
2739 var = vect_recog_temp_ssa_var (itype, NULL);
2740 def_stmt
2741 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2742 gimple_assign_lhs (def_stmt));
2743 append_pattern_def_seq (stmt_vinfo, def_stmt);
2745 shift = build_int_cst (itype, tree_log2 (oprnd1));
2746 pattern_stmt
2747 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2748 RSHIFT_EXPR, var, shift);
2750 else
2752 tree signmask;
2753 if (compare_tree_int (oprnd1, 2) == 0)
2755 signmask = vect_recog_temp_ssa_var (itype, NULL);
2756 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2757 build_int_cst (itype, 1),
2758 build_int_cst (itype, 0));
2759 append_pattern_def_seq (stmt_vinfo, def_stmt);
2761 else
2763 tree utype
2764 = build_nonstandard_integer_type (prec, 1);
2765 tree vecutype = get_vectype_for_scalar_type (utype);
2766 tree shift
2767 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2768 - tree_log2 (oprnd1));
2769 tree var = vect_recog_temp_ssa_var (utype, NULL);
2771 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2772 build_int_cst (utype, -1),
2773 build_int_cst (utype, 0));
2774 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2775 var = vect_recog_temp_ssa_var (utype, NULL);
2776 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2777 gimple_assign_lhs (def_stmt),
2778 shift);
2779 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2780 signmask = vect_recog_temp_ssa_var (itype, NULL);
2781 def_stmt
2782 = gimple_build_assign (signmask, NOP_EXPR, var);
2783 append_pattern_def_seq (stmt_vinfo, def_stmt);
2785 def_stmt
2786 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2787 PLUS_EXPR, oprnd0, signmask);
2788 append_pattern_def_seq (stmt_vinfo, def_stmt);
2789 def_stmt
2790 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2791 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2792 fold_build2 (MINUS_EXPR, itype, oprnd1,
2793 build_int_cst (itype, 1)));
2794 append_pattern_def_seq (stmt_vinfo, def_stmt);
2796 pattern_stmt
2797 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2798 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2799 signmask);
2802 *type_out = vectype;
2803 return pattern_stmt;
2806 if (prec > HOST_BITS_PER_WIDE_INT
2807 || integer_zerop (oprnd1))
2808 return NULL;
2810 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2811 return NULL;
2813 if (TYPE_UNSIGNED (itype))
2815 unsigned HOST_WIDE_INT mh, ml;
2816 int pre_shift, post_shift;
2817 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2818 & GET_MODE_MASK (itype_mode));
2819 tree t1, t2, t3, t4;
2821 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2822 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2823 return NULL;
2825 /* Find a suitable multiplier and right shift count
2826 instead of multiplying with D. */
2827 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2829 /* If the suggested multiplier is more than SIZE bits, we can do better
2830 for even divisors, using an initial right shift. */
2831 if (mh != 0 && (d & 1) == 0)
2833 pre_shift = ctz_or_zero (d);
2834 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2835 &ml, &post_shift, &dummy_int);
2836 gcc_assert (!mh);
2838 else
2839 pre_shift = 0;
2841 if (mh != 0)
2843 if (post_shift - 1 >= prec)
2844 return NULL;
2846 /* t1 = oprnd0 h* ml;
2847 t2 = oprnd0 - t1;
2848 t3 = t2 >> 1;
2849 t4 = t1 + t3;
2850 q = t4 >> (post_shift - 1); */
2851 t1 = vect_recog_temp_ssa_var (itype, NULL);
2852 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2853 build_int_cst (itype, ml));
2854 append_pattern_def_seq (stmt_vinfo, def_stmt);
2856 t2 = vect_recog_temp_ssa_var (itype, NULL);
2857 def_stmt
2858 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2859 append_pattern_def_seq (stmt_vinfo, def_stmt);
2861 t3 = vect_recog_temp_ssa_var (itype, NULL);
2862 def_stmt
2863 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2864 append_pattern_def_seq (stmt_vinfo, def_stmt);
2866 t4 = vect_recog_temp_ssa_var (itype, NULL);
2867 def_stmt
2868 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2870 if (post_shift != 1)
2872 append_pattern_def_seq (stmt_vinfo, def_stmt);
2874 q = vect_recog_temp_ssa_var (itype, NULL);
2875 pattern_stmt
2876 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2877 build_int_cst (itype, post_shift - 1));
2879 else
2881 q = t4;
2882 pattern_stmt = def_stmt;
2885 else
2887 if (pre_shift >= prec || post_shift >= prec)
2888 return NULL;
2890 /* t1 = oprnd0 >> pre_shift;
2891 t2 = t1 h* ml;
2892 q = t2 >> post_shift; */
2893 if (pre_shift)
2895 t1 = vect_recog_temp_ssa_var (itype, NULL);
2896 def_stmt
2897 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2898 build_int_cst (NULL, pre_shift));
2899 append_pattern_def_seq (stmt_vinfo, def_stmt);
2901 else
2902 t1 = oprnd0;
2904 t2 = vect_recog_temp_ssa_var (itype, NULL);
2905 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2906 build_int_cst (itype, ml));
2908 if (post_shift)
2910 append_pattern_def_seq (stmt_vinfo, def_stmt);
2912 q = vect_recog_temp_ssa_var (itype, NULL);
2913 def_stmt
2914 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2915 build_int_cst (itype, post_shift));
2917 else
2918 q = t2;
2920 pattern_stmt = def_stmt;
2923 else
2925 unsigned HOST_WIDE_INT ml;
2926 int post_shift;
2927 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2928 unsigned HOST_WIDE_INT abs_d;
2929 bool add = false;
2930 tree t1, t2, t3, t4;
2932 /* Give up for -1. */
2933 if (d == -1)
2934 return NULL;
2936 /* Since d might be INT_MIN, we have to cast to
2937 unsigned HOST_WIDE_INT before negating to avoid
2938 undefined signed overflow. */
2939 abs_d = (d >= 0
2940 ? (unsigned HOST_WIDE_INT) d
2941 : - (unsigned HOST_WIDE_INT) d);
2943 /* n rem d = n rem -d */
2944 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2946 d = abs_d;
2947 oprnd1 = build_int_cst (itype, abs_d);
2949 else if (HOST_BITS_PER_WIDE_INT >= prec
2950 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2951 /* This case is not handled correctly below. */
2952 return NULL;
2954 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2955 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2957 add = true;
2958 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2960 if (post_shift >= prec)
2961 return NULL;
2963 /* t1 = oprnd0 h* ml; */
2964 t1 = vect_recog_temp_ssa_var (itype, NULL);
2965 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2966 build_int_cst (itype, ml));
2968 if (add)
2970 /* t2 = t1 + oprnd0; */
2971 append_pattern_def_seq (stmt_vinfo, def_stmt);
2972 t2 = vect_recog_temp_ssa_var (itype, NULL);
2973 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2975 else
2976 t2 = t1;
2978 if (post_shift)
2980 /* t3 = t2 >> post_shift; */
2981 append_pattern_def_seq (stmt_vinfo, def_stmt);
2982 t3 = vect_recog_temp_ssa_var (itype, NULL);
2983 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2984 build_int_cst (itype, post_shift));
2986 else
2987 t3 = t2;
2989 wide_int oprnd0_min, oprnd0_max;
2990 int msb = 1;
2991 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2993 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2994 msb = 0;
2995 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2996 msb = -1;
2999 if (msb == 0 && d >= 0)
3001 /* q = t3; */
3002 q = t3;
3003 pattern_stmt = def_stmt;
3005 else
3007 /* t4 = oprnd0 >> (prec - 1);
3008 or if we know from VRP that oprnd0 >= 0
3009 t4 = 0;
3010 or if we know from VRP that oprnd0 < 0
3011 t4 = -1; */
3012 append_pattern_def_seq (stmt_vinfo, def_stmt);
3013 t4 = vect_recog_temp_ssa_var (itype, NULL);
3014 if (msb != 1)
3015 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3016 build_int_cst (itype, msb));
3017 else
3018 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3019 build_int_cst (itype, prec - 1));
3020 append_pattern_def_seq (stmt_vinfo, def_stmt);
3022 /* q = t3 - t4; or q = t4 - t3; */
3023 q = vect_recog_temp_ssa_var (itype, NULL);
3024 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3025 d < 0 ? t3 : t4);
3029 if (rhs_code == TRUNC_MOD_EXPR)
3031 tree r, t1;
3033 /* We divided. Now finish by:
3034 t1 = q * oprnd1;
3035 r = oprnd0 - t1; */
3036 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3038 t1 = vect_recog_temp_ssa_var (itype, NULL);
3039 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3040 append_pattern_def_seq (stmt_vinfo, def_stmt);
3042 r = vect_recog_temp_ssa_var (itype, NULL);
3043 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3046 /* Pattern detected. */
3047 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3049 *type_out = vectype;
3050 return pattern_stmt;
3053 /* Function vect_recog_mixed_size_cond_pattern
3055 Try to find the following pattern:
3057 type x_t, y_t;
3058 TYPE a_T, b_T, c_T;
3059 loop:
3060 S1 a_T = x_t CMP y_t ? b_T : c_T;
3062 where type 'TYPE' is an integral type which has different size
3063 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3064 than 'type', the constants need to fit into an integer type
3065 with the same width as 'type') or results of conversion from 'type'.
3067 Input:
3069 * STMT_VINFO: The stmt from which the pattern search begins.
3071 Output:
3073 * TYPE_OUT: The type of the output of this pattern.
3075 * Return value: A new stmt that will be used to replace the pattern.
3076 Additionally a def_stmt is added.
3078 a_it = x_t CMP y_t ? b_it : c_it;
3079 a_T = (TYPE) a_it; */
3081 static gimple *
3082 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3084 gimple *last_stmt = stmt_vinfo->stmt;
3085 tree cond_expr, then_clause, else_clause;
3086 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3087 gimple *pattern_stmt, *def_stmt;
3088 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3089 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3090 bool promotion;
3091 tree comp_scalar_type;
3093 if (!is_gimple_assign (last_stmt)
3094 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3095 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3096 return NULL;
3098 cond_expr = gimple_assign_rhs1 (last_stmt);
3099 then_clause = gimple_assign_rhs2 (last_stmt);
3100 else_clause = gimple_assign_rhs3 (last_stmt);
3102 if (!COMPARISON_CLASS_P (cond_expr))
3103 return NULL;
3105 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3106 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3107 if (comp_vectype == NULL_TREE)
3108 return NULL;
3110 type = gimple_expr_type (last_stmt);
3111 if (types_compatible_p (type, comp_scalar_type)
3112 || ((TREE_CODE (then_clause) != INTEGER_CST
3113 || TREE_CODE (else_clause) != INTEGER_CST)
3114 && !INTEGRAL_TYPE_P (comp_scalar_type))
3115 || !INTEGRAL_TYPE_P (type))
3116 return NULL;
3118 if ((TREE_CODE (then_clause) != INTEGER_CST
3119 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3120 &def_stmt0, &promotion))
3121 || (TREE_CODE (else_clause) != INTEGER_CST
3122 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3123 &def_stmt1, &promotion)))
3124 return NULL;
3126 if (orig_type0 && orig_type1
3127 && !types_compatible_p (orig_type0, orig_type1))
3128 return NULL;
3130 if (orig_type0)
3132 if (!types_compatible_p (orig_type0, comp_scalar_type))
3133 return NULL;
3134 then_clause = gimple_assign_rhs1 (def_stmt0);
3135 itype = orig_type0;
3138 if (orig_type1)
3140 if (!types_compatible_p (orig_type1, comp_scalar_type))
3141 return NULL;
3142 else_clause = gimple_assign_rhs1 (def_stmt1);
3143 itype = orig_type1;
3147 HOST_WIDE_INT cmp_mode_size
3148 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3150 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3151 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3152 return NULL;
3154 vectype = get_vectype_for_scalar_type (type);
3155 if (vectype == NULL_TREE)
3156 return NULL;
3158 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3159 return NULL;
3161 if (itype == NULL_TREE)
3162 itype = build_nonstandard_integer_type (cmp_mode_size,
3163 TYPE_UNSIGNED (type));
3165 if (itype == NULL_TREE
3166 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3167 return NULL;
3169 vecitype = get_vectype_for_scalar_type (itype);
3170 if (vecitype == NULL_TREE)
3171 return NULL;
3173 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3174 return NULL;
3176 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3178 if ((TREE_CODE (then_clause) == INTEGER_CST
3179 && !int_fits_type_p (then_clause, itype))
3180 || (TREE_CODE (else_clause) == INTEGER_CST
3181 && !int_fits_type_p (else_clause, itype)))
3182 return NULL;
3185 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3186 COND_EXPR, unshare_expr (cond_expr),
3187 fold_convert (itype, then_clause),
3188 fold_convert (itype, else_clause));
3189 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3190 NOP_EXPR, gimple_assign_lhs (def_stmt));
3192 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3193 *type_out = vectype;
3195 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3197 return pattern_stmt;
3201 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3202 true if bool VAR can and should be optimized that way. Assume it shouldn't
3203 in case it's a result of a comparison which can be directly vectorized into
3204 a vector comparison. Fills in STMTS with all stmts visited during the
3205 walk. */
3207 static bool
3208 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3210 tree rhs1;
3211 enum tree_code rhs_code;
3213 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3214 if (!def_stmt_info)
3215 return false;
3217 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3218 if (!def_stmt)
3219 return false;
3221 if (stmts.contains (def_stmt))
3222 return true;
3224 rhs1 = gimple_assign_rhs1 (def_stmt);
3225 rhs_code = gimple_assign_rhs_code (def_stmt);
3226 switch (rhs_code)
3228 case SSA_NAME:
3229 if (! check_bool_pattern (rhs1, vinfo, stmts))
3230 return false;
3231 break;
3233 CASE_CONVERT:
3234 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3235 return false;
3236 if (! check_bool_pattern (rhs1, vinfo, stmts))
3237 return false;
3238 break;
3240 case BIT_NOT_EXPR:
3241 if (! check_bool_pattern (rhs1, vinfo, stmts))
3242 return false;
3243 break;
3245 case BIT_AND_EXPR:
3246 case BIT_IOR_EXPR:
3247 case BIT_XOR_EXPR:
3248 if (! check_bool_pattern (rhs1, vinfo, stmts)
3249 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3250 return false;
3251 break;
3253 default:
3254 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3256 tree vecitype, comp_vectype;
3258 /* If the comparison can throw, then is_gimple_condexpr will be
3259 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3260 if (stmt_could_throw_p (def_stmt))
3261 return false;
3263 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3264 if (comp_vectype == NULL_TREE)
3265 return false;
3267 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3268 if (mask_type
3269 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3270 return false;
3272 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3274 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3275 tree itype
3276 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3277 vecitype = get_vectype_for_scalar_type (itype);
3278 if (vecitype == NULL_TREE)
3279 return false;
3281 else
3282 vecitype = comp_vectype;
3283 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3284 return false;
3286 else
3287 return false;
3288 break;
3291 bool res = stmts.add (def_stmt);
3292 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3293 gcc_assert (!res);
3295 return true;
3299 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3300 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3301 pattern sequence. */
3303 static tree
3304 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3306 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3307 NOP_EXPR, var);
3308 append_pattern_def_seq (stmt_info, cast_stmt,
3309 get_vectype_for_scalar_type (type));
3310 return gimple_assign_lhs (cast_stmt);
3313 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3314 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3315 type, OUT_TYPE is the desired final integer type of the whole pattern.
3316 STMT_INFO is the info of the pattern root and is where pattern stmts should
3317 be associated with. DEFS is a map of pattern defs. */
3319 static void
3320 adjust_bool_pattern (tree var, tree out_type,
3321 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3323 gimple *stmt = SSA_NAME_DEF_STMT (var);
3324 enum tree_code rhs_code, def_rhs_code;
3325 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3326 location_t loc;
3327 gimple *pattern_stmt, *def_stmt;
3328 tree trueval = NULL_TREE;
3330 rhs1 = gimple_assign_rhs1 (stmt);
3331 rhs2 = gimple_assign_rhs2 (stmt);
3332 rhs_code = gimple_assign_rhs_code (stmt);
3333 loc = gimple_location (stmt);
3334 switch (rhs_code)
3336 case SSA_NAME:
3337 CASE_CONVERT:
3338 irhs1 = *defs.get (rhs1);
3339 itype = TREE_TYPE (irhs1);
3340 pattern_stmt
3341 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3342 SSA_NAME, irhs1);
3343 break;
3345 case BIT_NOT_EXPR:
3346 irhs1 = *defs.get (rhs1);
3347 itype = TREE_TYPE (irhs1);
3348 pattern_stmt
3349 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3350 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3351 break;
3353 case BIT_AND_EXPR:
3354 /* Try to optimize x = y & (a < b ? 1 : 0); into
3355 x = (a < b ? y : 0);
3357 E.g. for:
3358 bool a_b, b_b, c_b;
3359 TYPE d_T;
3361 S1 a_b = x1 CMP1 y1;
3362 S2 b_b = x2 CMP2 y2;
3363 S3 c_b = a_b & b_b;
3364 S4 d_T = (TYPE) c_b;
3366 we would normally emit:
3368 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3369 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3370 S3' c_T = a_T & b_T;
3371 S4' d_T = c_T;
3373 but we can save one stmt by using the
3374 result of one of the COND_EXPRs in the other COND_EXPR and leave
3375 BIT_AND_EXPR stmt out:
3377 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3378 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3379 S4' f_T = c_T;
3381 At least when VEC_COND_EXPR is implemented using masks
3382 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3383 computes the comparison masks and ands it, in one case with
3384 all ones vector, in the other case with a vector register.
3385 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3386 often more expensive. */
3387 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3388 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3389 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3391 irhs1 = *defs.get (rhs1);
3392 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3393 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3394 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3396 rhs_code = def_rhs_code;
3397 rhs1 = def_rhs1;
3398 rhs2 = gimple_assign_rhs2 (def_stmt);
3399 trueval = irhs1;
3400 goto do_compare;
3402 else
3403 irhs2 = *defs.get (rhs2);
3404 goto and_ior_xor;
3406 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3407 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3408 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3410 irhs2 = *defs.get (rhs2);
3411 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3412 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3413 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3415 rhs_code = def_rhs_code;
3416 rhs1 = def_rhs1;
3417 rhs2 = gimple_assign_rhs2 (def_stmt);
3418 trueval = irhs2;
3419 goto do_compare;
3421 else
3422 irhs1 = *defs.get (rhs1);
3423 goto and_ior_xor;
3425 /* FALLTHRU */
3426 case BIT_IOR_EXPR:
3427 case BIT_XOR_EXPR:
3428 irhs1 = *defs.get (rhs1);
3429 irhs2 = *defs.get (rhs2);
3430 and_ior_xor:
3431 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3432 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3434 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3435 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3436 int out_prec = TYPE_PRECISION (out_type);
3437 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3438 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3439 stmt_info);
3440 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3441 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3442 stmt_info);
3443 else
3445 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3446 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3449 itype = TREE_TYPE (irhs1);
3450 pattern_stmt
3451 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3452 rhs_code, irhs1, irhs2);
3453 break;
3455 default:
3456 do_compare:
3457 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3458 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3459 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3460 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3461 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3463 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3464 itype
3465 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3467 else
3468 itype = TREE_TYPE (rhs1);
3469 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3470 if (trueval == NULL_TREE)
3471 trueval = build_int_cst (itype, 1);
3472 else
3473 gcc_checking_assert (useless_type_conversion_p (itype,
3474 TREE_TYPE (trueval)));
3475 pattern_stmt
3476 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3477 COND_EXPR, cond_expr, trueval,
3478 build_int_cst (itype, 0));
3479 break;
3482 gimple_set_location (pattern_stmt, loc);
3483 append_pattern_def_seq (stmt_info, pattern_stmt,
3484 get_vectype_for_scalar_type (itype));
3485 defs.put (var, gimple_assign_lhs (pattern_stmt));
3488 /* Comparison function to qsort a vector of gimple stmts after UID. */
3490 static int
3491 sort_after_uid (const void *p1, const void *p2)
3493 const gimple *stmt1 = *(const gimple * const *)p1;
3494 const gimple *stmt2 = *(const gimple * const *)p2;
3495 return gimple_uid (stmt1) - gimple_uid (stmt2);
3498 /* Create pattern stmts for all stmts participating in the bool pattern
3499 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3500 OUT_TYPE. Return the def of the pattern root. */
3502 static tree
3503 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3504 tree out_type, stmt_vec_info stmt_info)
3506 /* Gather original stmts in the bool pattern in their order of appearance
3507 in the IL. */
3508 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3509 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3510 i != bool_stmt_set.end (); ++i)
3511 bool_stmts.quick_push (*i);
3512 bool_stmts.qsort (sort_after_uid);
3514 /* Now process them in that order, producing pattern stmts. */
3515 hash_map <tree, tree> defs;
3516 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3517 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3518 out_type, stmt_info, defs);
3520 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3521 gimple *pattern_stmt
3522 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3523 return gimple_assign_lhs (pattern_stmt);
3526 /* Helper for search_type_for_mask. */
3528 static tree
3529 search_type_for_mask_1 (tree var, vec_info *vinfo,
3530 hash_map<gimple *, tree> &cache)
3532 tree rhs1;
3533 enum tree_code rhs_code;
3534 tree res = NULL_TREE, res2;
3536 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3537 return NULL_TREE;
3539 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3540 if (!def_stmt_info)
3541 return NULL_TREE;
3543 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3544 if (!def_stmt)
3545 return NULL_TREE;
3547 tree *c = cache.get (def_stmt);
3548 if (c)
3549 return *c;
3551 rhs_code = gimple_assign_rhs_code (def_stmt);
3552 rhs1 = gimple_assign_rhs1 (def_stmt);
3554 switch (rhs_code)
3556 case SSA_NAME:
3557 case BIT_NOT_EXPR:
3558 CASE_CONVERT:
3559 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3560 break;
3562 case BIT_AND_EXPR:
3563 case BIT_IOR_EXPR:
3564 case BIT_XOR_EXPR:
3565 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3566 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3567 cache);
3568 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3569 res = res2;
3570 break;
3572 default:
3573 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3575 tree comp_vectype, mask_type;
3577 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3579 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3580 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3581 vinfo, cache);
3582 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3583 res = res2;
3584 break;
3587 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3588 if (comp_vectype == NULL_TREE)
3590 res = NULL_TREE;
3591 break;
3594 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3595 if (!mask_type
3596 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3598 res = NULL_TREE;
3599 break;
3602 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3603 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3605 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3606 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3608 else
3609 res = TREE_TYPE (rhs1);
3613 cache.put (def_stmt, res);
3614 return res;
3617 /* Return the proper type for converting bool VAR into
3618 an integer value or NULL_TREE if no such type exists.
3619 The type is chosen so that converted value has the
3620 same number of elements as VAR's vector type. */
3622 static tree
3623 search_type_for_mask (tree var, vec_info *vinfo)
3625 hash_map<gimple *, tree> cache;
3626 return search_type_for_mask_1 (var, vinfo, cache);
3629 /* Function vect_recog_bool_pattern
3631 Try to find pattern like following:
3633 bool a_b, b_b, c_b, d_b, e_b;
3634 TYPE f_T;
3635 loop:
3636 S1 a_b = x1 CMP1 y1;
3637 S2 b_b = x2 CMP2 y2;
3638 S3 c_b = a_b & b_b;
3639 S4 d_b = x3 CMP3 y3;
3640 S5 e_b = c_b | d_b;
3641 S6 f_T = (TYPE) e_b;
3643 where type 'TYPE' is an integral type. Or a similar pattern
3644 ending in
3646 S6 f_Y = e_b ? r_Y : s_Y;
3648 as results from if-conversion of a complex condition.
3650 Input:
3652 * STMT_VINFO: The stmt at the end from which the pattern
3653 search begins, i.e. cast of a bool to
3654 an integer type.
3656 Output:
3658 * TYPE_OUT: The type of the output of this pattern.
3660 * Return value: A new stmt that will be used to replace the pattern.
3662 Assuming size of TYPE is the same as size of all comparisons
3663 (otherwise some casts would be added where needed), the above
3664 sequence we create related pattern stmts:
3665 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3666 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3667 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3668 S5' e_T = c_T | d_T;
3669 S6' f_T = e_T;
3671 Instead of the above S3' we could emit:
3672 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3673 S3' c_T = a_T | b_T;
3674 but the above is more efficient. */
3676 static gimple *
3677 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3679 gimple *last_stmt = stmt_vinfo->stmt;
3680 enum tree_code rhs_code;
3681 tree var, lhs, rhs, vectype;
3682 vec_info *vinfo = stmt_vinfo->vinfo;
3683 gimple *pattern_stmt;
3685 if (!is_gimple_assign (last_stmt))
3686 return NULL;
3688 var = gimple_assign_rhs1 (last_stmt);
3689 lhs = gimple_assign_lhs (last_stmt);
3691 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3692 return NULL;
3694 hash_set<gimple *> bool_stmts;
3696 rhs_code = gimple_assign_rhs_code (last_stmt);
3697 if (CONVERT_EXPR_CODE_P (rhs_code))
3699 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3700 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3701 return NULL;
3702 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3703 if (vectype == NULL_TREE)
3704 return NULL;
3706 if (check_bool_pattern (var, vinfo, bool_stmts))
3708 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
3709 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3710 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3711 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3712 else
3713 pattern_stmt
3714 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3716 else
3718 tree type = search_type_for_mask (var, vinfo);
3719 tree cst0, cst1, tmp;
3721 if (!type)
3722 return NULL;
3724 /* We may directly use cond with narrowed type to avoid
3725 multiple cond exprs with following result packing and
3726 perform single cond with packed mask instead. In case
3727 of widening we better make cond first and then extract
3728 results. */
3729 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3730 type = TREE_TYPE (lhs);
3732 cst0 = build_int_cst (type, 0);
3733 cst1 = build_int_cst (type, 1);
3734 tmp = vect_recog_temp_ssa_var (type, NULL);
3735 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3737 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3739 tree new_vectype = get_vectype_for_scalar_type (type);
3740 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3742 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3743 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3747 *type_out = vectype;
3748 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3750 return pattern_stmt;
3752 else if (rhs_code == COND_EXPR
3753 && TREE_CODE (var) == SSA_NAME)
3755 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3756 if (vectype == NULL_TREE)
3757 return NULL;
3759 /* Build a scalar type for the boolean result that when
3760 vectorized matches the vector type of the result in
3761 size and number of elements. */
3762 unsigned prec
3763 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3764 TYPE_VECTOR_SUBPARTS (vectype));
3766 tree type
3767 = build_nonstandard_integer_type (prec,
3768 TYPE_UNSIGNED (TREE_TYPE (var)));
3769 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3770 return NULL;
3772 if (!check_bool_pattern (var, vinfo, bool_stmts))
3773 return NULL;
3775 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
3777 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3778 pattern_stmt
3779 = gimple_build_assign (lhs, COND_EXPR,
3780 build2 (NE_EXPR, boolean_type_node,
3781 rhs, build_int_cst (type, 0)),
3782 gimple_assign_rhs2 (last_stmt),
3783 gimple_assign_rhs3 (last_stmt));
3784 *type_out = vectype;
3785 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3787 return pattern_stmt;
3789 else if (rhs_code == SSA_NAME
3790 && STMT_VINFO_DATA_REF (stmt_vinfo))
3792 stmt_vec_info pattern_stmt_info;
3793 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3794 gcc_assert (vectype != NULL_TREE);
3795 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3796 return NULL;
3798 if (check_bool_pattern (var, vinfo, bool_stmts))
3799 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
3800 else
3802 tree type = search_type_for_mask (var, vinfo);
3803 tree cst0, cst1, new_vectype;
3805 if (!type)
3806 return NULL;
3808 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3809 type = TREE_TYPE (vectype);
3811 cst0 = build_int_cst (type, 0);
3812 cst1 = build_int_cst (type, 1);
3813 new_vectype = get_vectype_for_scalar_type (type);
3815 rhs = vect_recog_temp_ssa_var (type, NULL);
3816 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3817 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3820 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3821 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3823 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3824 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3825 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3826 rhs = rhs2;
3828 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3829 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3830 STMT_VINFO_DATA_REF (pattern_stmt_info)
3831 = STMT_VINFO_DATA_REF (stmt_vinfo);
3832 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3833 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3834 *type_out = vectype;
3835 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3837 return pattern_stmt;
3839 else
3840 return NULL;
3844 /* A helper for vect_recog_mask_conversion_pattern. Build
3845 conversion of MASK to a type suitable for masking VECTYPE.
3846 Built statement gets required vectype and is appended to
3847 a pattern sequence of STMT_VINFO.
3849 Return converted mask. */
3851 static tree
3852 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
3854 gimple *stmt;
3855 tree masktype, tmp;
3857 masktype = build_same_sized_truth_vector_type (vectype);
3858 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3859 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3860 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
3862 return tmp;
3866 /* Function vect_recog_mask_conversion_pattern
3868 Try to find statements which require boolean type
3869 converison. Additional conversion statements are
3870 added to handle such cases. For example:
3872 bool m_1, m_2, m_3;
3873 int i_4, i_5;
3874 double d_6, d_7;
3875 char c_1, c_2, c_3;
3877 S1 m_1 = i_4 > i_5;
3878 S2 m_2 = d_6 < d_7;
3879 S3 m_3 = m_1 & m_2;
3880 S4 c_1 = m_3 ? c_2 : c_3;
3882 Will be transformed into:
3884 S1 m_1 = i_4 > i_5;
3885 S2 m_2 = d_6 < d_7;
3886 S3'' m_2' = (_Bool[bitsize=32])m_2
3887 S3' m_3' = m_1 & m_2';
3888 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3889 S4' c_1' = m_3'' ? c_2 : c_3; */
3891 static gimple *
3892 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3894 gimple *last_stmt = stmt_vinfo->stmt;
3895 enum tree_code rhs_code;
3896 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3897 tree vectype1, vectype2;
3898 stmt_vec_info pattern_stmt_info;
3899 vec_info *vinfo = stmt_vinfo->vinfo;
3901 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3902 if (is_gimple_call (last_stmt)
3903 && gimple_call_internal_p (last_stmt))
3905 gcall *pattern_stmt;
3907 internal_fn ifn = gimple_call_internal_fn (last_stmt);
3908 int mask_argno = internal_fn_mask_index (ifn);
3909 if (mask_argno < 0)
3910 return NULL;
3912 bool store_p = internal_store_fn_p (ifn);
3913 if (store_p)
3915 int rhs_index = internal_fn_stored_value_index (ifn);
3916 tree rhs = gimple_call_arg (last_stmt, rhs_index);
3917 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
3919 else
3921 lhs = gimple_call_lhs (last_stmt);
3922 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3925 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3926 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3927 if (!mask_arg_type)
3928 return NULL;
3929 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
3931 if (!vectype1 || !vectype2
3932 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3933 TYPE_VECTOR_SUBPARTS (vectype2)))
3934 return NULL;
3936 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3938 auto_vec<tree, 8> args;
3939 unsigned int nargs = gimple_call_num_args (last_stmt);
3940 args.safe_grow (nargs);
3941 for (unsigned int i = 0; i < nargs; ++i)
3942 args[i] = ((int) i == mask_argno
3943 ? tmp
3944 : gimple_call_arg (last_stmt, i));
3945 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
3947 if (!store_p)
3949 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3950 gimple_call_set_lhs (pattern_stmt, lhs);
3952 gimple_call_set_nothrow (pattern_stmt, true);
3954 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3955 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3957 STMT_VINFO_DATA_REF (pattern_stmt_info)
3958 = STMT_VINFO_DATA_REF (stmt_vinfo);
3959 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3960 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3961 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
3962 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo);
3965 *type_out = vectype1;
3966 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3968 return pattern_stmt;
3971 if (!is_gimple_assign (last_stmt))
3972 return NULL;
3974 gimple *pattern_stmt;
3975 lhs = gimple_assign_lhs (last_stmt);
3976 rhs1 = gimple_assign_rhs1 (last_stmt);
3977 rhs_code = gimple_assign_rhs_code (last_stmt);
3979 /* Check for cond expression requiring mask conversion. */
3980 if (rhs_code == COND_EXPR)
3982 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3984 if (TREE_CODE (rhs1) == SSA_NAME)
3986 rhs1_type = search_type_for_mask (rhs1, vinfo);
3987 if (!rhs1_type)
3988 return NULL;
3990 else if (COMPARISON_CLASS_P (rhs1))
3992 /* Check whether we're comparing scalar booleans and (if so)
3993 whether a better mask type exists than the mask associated
3994 with boolean-sized elements. This avoids unnecessary packs
3995 and unpacks if the booleans are set from comparisons of
3996 wider types. E.g. in:
3998 int x1, x2, x3, x4, y1, y1;
4000 bool b1 = (x1 == x2);
4001 bool b2 = (x3 == x4);
4002 ... = b1 == b2 ? y1 : y2;
4004 it is better for b1 and b2 to use the mask type associated
4005 with int elements rather bool (byte) elements. */
4006 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4007 if (!rhs1_type)
4008 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4010 else
4011 return NULL;
4013 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4015 if (!vectype1 || !vectype2)
4016 return NULL;
4018 /* Continue if a conversion is needed. Also continue if we have
4019 a comparison whose vector type would normally be different from
4020 VECTYPE2 when considered in isolation. In that case we'll
4021 replace the comparison with an SSA name (so that we can record
4022 its vector type) and behave as though the comparison was an SSA
4023 name from the outset. */
4024 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4025 TYPE_VECTOR_SUBPARTS (vectype2))
4026 && (TREE_CODE (rhs1) == SSA_NAME
4027 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4028 return NULL;
4030 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4031 in place, we can handle it in vectorizable_condition. This avoids
4032 unnecessary promotion stmts and increased vectorization factor. */
4033 if (COMPARISON_CLASS_P (rhs1)
4034 && INTEGRAL_TYPE_P (rhs1_type)
4035 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4036 TYPE_VECTOR_SUBPARTS (vectype2)))
4038 enum vect_def_type dt;
4039 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4040 && dt == vect_external_def
4041 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4042 && (dt == vect_external_def
4043 || dt == vect_constant_def))
4045 tree wide_scalar_type = build_nonstandard_integer_type
4046 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4047 TYPE_UNSIGNED (rhs1_type));
4048 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4049 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4050 return NULL;
4054 /* If rhs1 is a comparison we need to move it into a
4055 separate statement. */
4056 if (TREE_CODE (rhs1) != SSA_NAME)
4058 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4059 pattern_stmt = gimple_build_assign (tmp, rhs1);
4060 rhs1 = tmp;
4061 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
4064 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4065 TYPE_VECTOR_SUBPARTS (vectype2)))
4066 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4067 else
4068 tmp = rhs1;
4070 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4071 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4072 gimple_assign_rhs2 (last_stmt),
4073 gimple_assign_rhs3 (last_stmt));
4075 *type_out = vectype1;
4076 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4078 return pattern_stmt;
4081 /* Now check for binary boolean operations requiring conversion for
4082 one of operands. */
4083 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4084 return NULL;
4086 if (rhs_code != BIT_IOR_EXPR
4087 && rhs_code != BIT_XOR_EXPR
4088 && rhs_code != BIT_AND_EXPR
4089 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4090 return NULL;
4092 rhs2 = gimple_assign_rhs2 (last_stmt);
4094 rhs1_type = search_type_for_mask (rhs1, vinfo);
4095 rhs2_type = search_type_for_mask (rhs2, vinfo);
4097 if (!rhs1_type || !rhs2_type
4098 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4099 return NULL;
4101 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4103 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4104 if (!vectype1)
4105 return NULL;
4106 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4108 else
4110 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4111 if (!vectype1)
4112 return NULL;
4113 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4116 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4117 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4119 *type_out = vectype1;
4120 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4122 return pattern_stmt;
4125 /* STMT_INFO is a load or store. If the load or store is conditional, return
4126 the boolean condition under which it occurs, otherwise return null. */
4128 static tree
4129 vect_get_load_store_mask (stmt_vec_info stmt_info)
4131 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4133 gcc_assert (gimple_assign_single_p (def_assign));
4134 return NULL_TREE;
4137 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4139 internal_fn ifn = gimple_call_internal_fn (def_call);
4140 int mask_index = internal_fn_mask_index (ifn);
4141 return gimple_call_arg (def_call, mask_index);
4144 gcc_unreachable ();
4147 /* Return the scalar offset type that an internal gather/scatter function
4148 should use. GS_INFO describes the gather/scatter operation. */
4150 static tree
4151 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4153 tree offset_type = TREE_TYPE (gs_info->offset);
4154 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4156 /* Enforced by vect_check_gather_scatter. */
4157 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4158 gcc_assert (element_bits >= offset_bits);
4160 /* If the offset is narrower than the elements, extend it according
4161 to its sign. */
4162 if (element_bits > offset_bits)
4163 return build_nonstandard_integer_type (element_bits,
4164 TYPE_UNSIGNED (offset_type));
4166 return offset_type;
4169 /* Return MASK if MASK is suitable for masking an operation on vectors
4170 of type VECTYPE, otherwise convert it into such a form and return
4171 the result. Associate any conversion statements with STMT_INFO's
4172 pattern. */
4174 static tree
4175 vect_convert_mask_for_vectype (tree mask, tree vectype,
4176 stmt_vec_info stmt_info, vec_info *vinfo)
4178 tree mask_type = search_type_for_mask (mask, vinfo);
4179 if (mask_type)
4181 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4182 if (mask_vectype
4183 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4184 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4185 mask = build_mask_conversion (mask, vectype, stmt_info);
4187 return mask;
4190 /* Return the equivalent of:
4192 fold_convert (TYPE, VALUE)
4194 with the expectation that the operation will be vectorized.
4195 If new statements are needed, add them as pattern statements
4196 to STMT_INFO. */
4198 static tree
4199 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4201 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4202 return value;
4204 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4205 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4206 append_pattern_def_seq (stmt_info, conversion,
4207 get_vectype_for_scalar_type (type));
4208 return new_value;
4211 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4212 internal function. Return the final statement on success and set
4213 *TYPE_OUT to the vector type being loaded or stored.
4215 This function only handles gathers and scatters that were recognized
4216 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4218 static gimple *
4219 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4221 /* Currently we only support this for loop vectorization. */
4222 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4223 if (!loop_vinfo)
4224 return NULL;
4226 /* Make sure that we're looking at a gather load or scatter store. */
4227 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4228 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4229 return NULL;
4231 /* Get the boolean that controls whether the load or store happens.
4232 This is null if the operation is unconditional. */
4233 tree mask = vect_get_load_store_mask (stmt_info);
4235 /* Make sure that the target supports an appropriate internal
4236 function for the gather/scatter operation. */
4237 gather_scatter_info gs_info;
4238 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4239 || gs_info.decl)
4240 return NULL;
4242 /* Convert the mask to the right form. */
4243 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4244 if (mask)
4245 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4246 loop_vinfo);
4248 /* Get the invariant base and non-invariant offset, converting the
4249 latter to the same width as the vector elements. */
4250 tree base = gs_info.base;
4251 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4252 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4253 stmt_info);
4255 /* Build the new pattern statement. */
4256 tree scale = size_int (gs_info.scale);
4257 gcall *pattern_stmt;
4258 if (DR_IS_READ (dr))
4260 if (mask != NULL)
4261 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4262 offset, scale, mask);
4263 else
4264 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4265 offset, scale);
4266 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4267 gimple_call_set_lhs (pattern_stmt, load_lhs);
4269 else
4271 tree rhs = vect_get_store_rhs (stmt_info);
4272 if (mask != NULL)
4273 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4274 base, offset, scale, rhs,
4275 mask);
4276 else
4277 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4278 base, offset, scale, rhs);
4280 gimple_call_set_nothrow (pattern_stmt, true);
4282 /* Copy across relevant vectorization info and associate DR with the
4283 new pattern statement instead of the original statement. */
4284 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4285 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4286 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4287 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4288 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4289 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4291 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4292 *type_out = vectype;
4293 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4295 return pattern_stmt;
4298 /* Return true if TYPE is a non-boolean integer type. These are the types
4299 that we want to consider for narrowing. */
4301 static bool
4302 vect_narrowable_type_p (tree type)
4304 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4307 /* Return true if the operation given by CODE can be truncated to N bits
4308 when only N bits of the output are needed. This is only true if bit N+1
4309 of the inputs has no effect on the low N bits of the result. */
4311 static bool
4312 vect_truncatable_operation_p (tree_code code)
4314 switch (code)
4316 case PLUS_EXPR:
4317 case MINUS_EXPR:
4318 case MULT_EXPR:
4319 case BIT_AND_EXPR:
4320 case BIT_IOR_EXPR:
4321 case BIT_XOR_EXPR:
4322 case COND_EXPR:
4323 return true;
4325 default:
4326 return false;
4330 /* Record that STMT_INFO could be changed from operating on TYPE to
4331 operating on a type with the precision and sign given by PRECISION
4332 and SIGN respectively. PRECISION is an arbitrary bit precision;
4333 it might not be a whole number of bytes. */
4335 static void
4336 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4337 unsigned int precision, signop sign)
4339 /* Round the precision up to a whole number of bytes. */
4340 precision = vect_element_precision (precision);
4341 if (precision < TYPE_PRECISION (type)
4342 && (!stmt_info->operation_precision
4343 || stmt_info->operation_precision > precision))
4345 stmt_info->operation_precision = precision;
4346 stmt_info->operation_sign = sign;
4350 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4351 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4352 is an arbitrary bit precision; it might not be a whole number of bytes. */
4354 static void
4355 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4356 unsigned int min_input_precision)
4358 /* This operation in isolation only requires the inputs to have
4359 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4360 that MIN_INPUT_PRECISION is a natural precision for the chain
4361 as a whole. E.g. consider something like:
4363 unsigned short *x, *y;
4364 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4366 The right shift can be done on unsigned chars, and only requires the
4367 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4368 approach would mean turning a natural chain of single-vector unsigned
4369 short operations into one that truncates "*x" and then extends
4370 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4371 operation and one vector for each unsigned char operation.
4372 This would be a significant pessimization.
4374 Instead only propagate the maximum of this precision and the precision
4375 required by the users of the result. This means that we don't pessimize
4376 the case above but continue to optimize things like:
4378 unsigned char *y;
4379 unsigned short *x;
4380 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4382 Here we would truncate two vectors of *x to a single vector of
4383 unsigned chars and use single-vector unsigned char operations for
4384 everything else, rather than doing two unsigned short copies of
4385 "(*x & 0xf0) >> 4" and then truncating the result. */
4386 min_input_precision = MAX (min_input_precision,
4387 stmt_info->min_output_precision);
4389 if (min_input_precision < TYPE_PRECISION (type)
4390 && (!stmt_info->min_input_precision
4391 || stmt_info->min_input_precision > min_input_precision))
4392 stmt_info->min_input_precision = min_input_precision;
4395 /* Subroutine of vect_determine_min_output_precision. Return true if
4396 we can calculate a reduced number of output bits for STMT_INFO,
4397 whose result is LHS. */
4399 static bool
4400 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4402 /* Take the maximum precision required by users of the result. */
4403 vec_info *vinfo = stmt_info->vinfo;
4404 unsigned int precision = 0;
4405 imm_use_iterator iter;
4406 use_operand_p use;
4407 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4409 gimple *use_stmt = USE_STMT (use);
4410 if (is_gimple_debug (use_stmt))
4411 continue;
4412 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4413 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4414 return false;
4415 precision = MAX (precision, use_stmt_info->min_input_precision);
4418 if (dump_enabled_p ())
4420 dump_printf_loc (MSG_NOTE, vect_location, "only the low %d bits of ",
4421 precision);
4422 dump_generic_expr (MSG_NOTE, TDF_SLIM, lhs);
4423 dump_printf (MSG_NOTE, " are significant\n");
4425 stmt_info->min_output_precision = precision;
4426 return true;
4429 /* Calculate min_output_precision for STMT_INFO. */
4431 static void
4432 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4434 /* We're only interested in statements with a narrowable result. */
4435 tree lhs = gimple_get_lhs (stmt_info->stmt);
4436 if (!lhs
4437 || TREE_CODE (lhs) != SSA_NAME
4438 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4439 return;
4441 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4442 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4445 /* Use range information to decide whether STMT (described by STMT_INFO)
4446 could be done in a narrower type. This is effectively a forward
4447 propagation, since it uses context-independent information that applies
4448 to all users of an SSA name. */
4450 static void
4451 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4453 tree lhs = gimple_assign_lhs (stmt);
4454 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4455 return;
4457 tree type = TREE_TYPE (lhs);
4458 if (!vect_narrowable_type_p (type))
4459 return;
4461 /* First see whether we have any useful range information for the result. */
4462 unsigned int precision = TYPE_PRECISION (type);
4463 signop sign = TYPE_SIGN (type);
4464 wide_int min_value, max_value;
4465 if (!vect_get_range_info (lhs, &min_value, &max_value))
4466 return;
4468 tree_code code = gimple_assign_rhs_code (stmt);
4469 unsigned int nops = gimple_num_ops (stmt);
4471 if (!vect_truncatable_operation_p (code))
4472 /* Check that all relevant input operands are compatible, and update
4473 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4474 for (unsigned int i = 1; i < nops; ++i)
4476 tree op = gimple_op (stmt, i);
4477 if (TREE_CODE (op) == INTEGER_CST)
4479 /* Don't require the integer to have RHS_TYPE (which it might
4480 not for things like shift amounts, etc.), but do require it
4481 to fit the type. */
4482 if (!int_fits_type_p (op, type))
4483 return;
4485 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4486 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4488 else if (TREE_CODE (op) == SSA_NAME)
4490 /* Ignore codes that don't take uniform arguments. */
4491 if (!types_compatible_p (TREE_TYPE (op), type))
4492 return;
4494 wide_int op_min_value, op_max_value;
4495 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4496 return;
4498 min_value = wi::min (min_value, op_min_value, sign);
4499 max_value = wi::max (max_value, op_max_value, sign);
4501 else
4502 return;
4505 /* Try to switch signed types for unsigned types if we can.
4506 This is better for two reasons. First, unsigned ops tend
4507 to be cheaper than signed ops. Second, it means that we can
4508 handle things like:
4510 signed char c;
4511 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4515 signed char c;
4516 unsigned short res_1 = (unsigned short) c & 0xff00;
4517 int res = (int) res_1;
4519 where the intermediate result res_1 has unsigned rather than
4520 signed type. */
4521 if (sign == SIGNED && !wi::neg_p (min_value))
4522 sign = UNSIGNED;
4524 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4525 unsigned int precision1 = wi::min_precision (min_value, sign);
4526 unsigned int precision2 = wi::min_precision (max_value, sign);
4527 unsigned int value_precision = MAX (precision1, precision2);
4528 if (value_precision >= precision)
4529 return;
4531 if (dump_enabled_p ())
4533 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4534 " without loss of precision: ",
4535 sign == SIGNED ? "signed" : "unsigned",
4536 value_precision);
4537 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4540 vect_set_operation_type (stmt_info, type, value_precision, sign);
4541 vect_set_min_input_precision (stmt_info, type, value_precision);
4544 /* Use information about the users of STMT's result to decide whether
4545 STMT (described by STMT_INFO) could be done in a narrower type.
4546 This is effectively a backward propagation. */
4548 static void
4549 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4551 tree_code code = gimple_assign_rhs_code (stmt);
4552 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4553 tree type = TREE_TYPE (gimple_op (stmt, opno));
4554 if (!vect_narrowable_type_p (type))
4555 return;
4557 unsigned int precision = TYPE_PRECISION (type);
4558 unsigned int operation_precision, min_input_precision;
4559 switch (code)
4561 CASE_CONVERT:
4562 /* Only the bits that contribute to the output matter. Don't change
4563 the precision of the operation itself. */
4564 operation_precision = precision;
4565 min_input_precision = stmt_info->min_output_precision;
4566 break;
4568 case LSHIFT_EXPR:
4569 case RSHIFT_EXPR:
4571 tree shift = gimple_assign_rhs2 (stmt);
4572 if (TREE_CODE (shift) != INTEGER_CST
4573 || !wi::ltu_p (wi::to_widest (shift), precision))
4574 return;
4575 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4576 if (code == LSHIFT_EXPR)
4578 /* We need CONST_SHIFT fewer bits of the input. */
4579 operation_precision = stmt_info->min_output_precision;
4580 min_input_precision = (MAX (operation_precision, const_shift)
4581 - const_shift);
4583 else
4585 /* We need CONST_SHIFT extra bits to do the operation. */
4586 operation_precision = (stmt_info->min_output_precision
4587 + const_shift);
4588 min_input_precision = operation_precision;
4590 break;
4593 default:
4594 if (vect_truncatable_operation_p (code))
4596 /* Input bit N has no effect on output bits N-1 and lower. */
4597 operation_precision = stmt_info->min_output_precision;
4598 min_input_precision = operation_precision;
4599 break;
4601 return;
4604 if (operation_precision < precision)
4606 if (dump_enabled_p ())
4608 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4609 " without affecting users: ",
4610 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4611 operation_precision);
4612 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4614 vect_set_operation_type (stmt_info, type, operation_precision,
4615 TYPE_SIGN (type));
4617 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4620 /* Handle vect_determine_precisions for STMT_INFO, given that we
4621 have already done so for the users of its result. */
4623 void
4624 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4626 vect_determine_min_output_precision (stmt_info);
4627 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4629 vect_determine_precisions_from_range (stmt_info, stmt);
4630 vect_determine_precisions_from_users (stmt_info, stmt);
4634 /* Walk backwards through the vectorizable region to determine the
4635 values of these fields:
4637 - min_output_precision
4638 - min_input_precision
4639 - operation_precision
4640 - operation_sign. */
4642 void
4643 vect_determine_precisions (vec_info *vinfo)
4645 DUMP_VECT_SCOPE ("vect_determine_precisions");
4647 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4649 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4650 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4651 unsigned int nbbs = loop->num_nodes;
4653 for (unsigned int i = 0; i < nbbs; i++)
4655 basic_block bb = bbs[nbbs - i - 1];
4656 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4657 !gsi_end_p (si); gsi_prev (&si))
4658 vect_determine_stmt_precisions
4659 (vinfo->lookup_stmt (gsi_stmt (si)));
4662 else
4664 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4665 gimple_stmt_iterator si = bb_vinfo->region_end;
4666 gimple *stmt;
4669 if (!gsi_stmt (si))
4670 si = gsi_last_bb (bb_vinfo->bb);
4671 else
4672 gsi_prev (&si);
4673 stmt = gsi_stmt (si);
4674 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
4675 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4676 vect_determine_stmt_precisions (stmt_info);
4678 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4682 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
4684 struct vect_recog_func
4686 vect_recog_func_ptr fn;
4687 const char *name;
4690 /* Note that ordering matters - the first pattern matching on a stmt is
4691 taken which means usually the more complex one needs to preceed the
4692 less comples onex (widen_sum only after dot_prod or sad for example). */
4693 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4694 { vect_recog_over_widening_pattern, "over_widening" },
4695 /* Must come after over_widening, which narrows the shift as much as
4696 possible beforehand. */
4697 { vect_recog_average_pattern, "average" },
4698 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4699 { vect_recog_widen_mult_pattern, "widen_mult" },
4700 { vect_recog_dot_prod_pattern, "dot_prod" },
4701 { vect_recog_sad_pattern, "sad" },
4702 { vect_recog_widen_sum_pattern, "widen_sum" },
4703 { vect_recog_pow_pattern, "pow" },
4704 { vect_recog_widen_shift_pattern, "widen_shift" },
4705 { vect_recog_rotate_pattern, "rotate" },
4706 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4707 { vect_recog_divmod_pattern, "divmod" },
4708 { vect_recog_mult_pattern, "mult" },
4709 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4710 { vect_recog_bool_pattern, "bool" },
4711 /* This must come before mask conversion, and includes the parts
4712 of mask conversion that are needed for gather and scatter
4713 internal functions. */
4714 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4715 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4718 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4720 /* Mark statements that are involved in a pattern. */
4722 static inline void
4723 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
4724 tree pattern_vectype)
4726 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4728 gimple *orig_pattern_stmt = NULL;
4729 if (is_pattern_stmt_p (orig_stmt_info))
4731 /* We're replacing a statement in an existing pattern definition
4732 sequence. */
4733 orig_pattern_stmt = orig_stmt_info->stmt;
4734 if (dump_enabled_p ())
4736 dump_printf_loc (MSG_NOTE, vect_location,
4737 "replacing earlier pattern ");
4738 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, orig_pattern_stmt, 0);
4741 /* To keep the book-keeping simple, just swap the lhs of the
4742 old and new statements, so that the old one has a valid but
4743 unused lhs. */
4744 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
4745 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
4746 gimple_set_lhs (pattern_stmt, old_lhs);
4748 if (dump_enabled_p ())
4750 dump_printf_loc (MSG_NOTE, vect_location, "with ");
4751 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4754 /* Switch to the statement that ORIG replaces. */
4755 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
4757 /* We shouldn't be replacing the main pattern statement. */
4758 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
4759 != orig_pattern_stmt);
4762 if (def_seq)
4763 for (gimple_stmt_iterator si = gsi_start (def_seq);
4764 !gsi_end_p (si); gsi_next (&si))
4765 vect_init_pattern_stmt (gsi_stmt (si), orig_stmt_info, pattern_vectype);
4767 if (orig_pattern_stmt)
4769 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4771 /* Insert all the new pattern statements before the original one. */
4772 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4773 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
4774 orig_def_seq);
4775 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4776 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4778 /* Remove the pattern statement that this new pattern replaces. */
4779 gsi_remove (&gsi, false);
4781 else
4782 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4785 /* Function vect_pattern_recog_1
4787 Input:
4788 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4789 computation pattern.
4790 STMT_INFO: A stmt from which the pattern search should start.
4792 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4793 a sequence of statements that has the same functionality and can be
4794 used to replace STMT_INFO. It returns the last statement in the sequence
4795 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
4796 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4797 statement, having first checked that the target supports the new operation
4798 in that type.
4800 This function also does some bookkeeping, as explained in the documentation
4801 for vect_recog_pattern. */
4803 static void
4804 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
4806 vec_info *vinfo = stmt_info->vinfo;
4807 gimple *pattern_stmt;
4808 loop_vec_info loop_vinfo;
4809 tree pattern_vectype;
4811 /* If this statement has already been replaced with pattern statements,
4812 leave the original statement alone, since the first match wins.
4813 Instead try to match against the definition statements that feed
4814 the main pattern statement. */
4815 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4817 gimple_stmt_iterator gsi;
4818 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4819 !gsi_end_p (gsi); gsi_next (&gsi))
4820 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
4821 return;
4824 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4825 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
4826 if (!pattern_stmt)
4828 /* Clear any half-formed pattern definition sequence. */
4829 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4830 return;
4833 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4834 gcc_assert (pattern_vectype);
4836 /* Found a vectorizable pattern. */
4837 if (dump_enabled_p ())
4839 dump_printf_loc (MSG_NOTE, vect_location,
4840 "%s pattern recognized: ", recog_func->name);
4841 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4844 /* Mark the stmts that are involved in the pattern. */
4845 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
4847 /* Patterns cannot be vectorized using SLP, because they change the order of
4848 computation. */
4849 if (loop_vinfo)
4851 unsigned ix, ix2;
4852 stmt_vec_info *elem_ptr;
4853 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4854 elem_ptr, *elem_ptr == stmt_info);
4859 /* Function vect_pattern_recog
4861 Input:
4862 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4863 computation idioms.
4865 Output - for each computation idiom that is detected we create a new stmt
4866 that provides the same functionality and that can be vectorized. We
4867 also record some information in the struct_stmt_info of the relevant
4868 stmts, as explained below:
4870 At the entry to this function we have the following stmts, with the
4871 following initial value in the STMT_VINFO fields:
4873 stmt in_pattern_p related_stmt vec_stmt
4874 S1: a_i = .... - - -
4875 S2: a_2 = ..use(a_i).. - - -
4876 S3: a_1 = ..use(a_2).. - - -
4877 S4: a_0 = ..use(a_1).. - - -
4878 S5: ... = ..use(a_0).. - - -
4880 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4881 represented by a single stmt. We then:
4882 - create a new stmt S6 equivalent to the pattern (the stmt is not
4883 inserted into the code)
4884 - fill in the STMT_VINFO fields as follows:
4886 in_pattern_p related_stmt vec_stmt
4887 S1: a_i = .... - - -
4888 S2: a_2 = ..use(a_i).. - - -
4889 S3: a_1 = ..use(a_2).. - - -
4890 S4: a_0 = ..use(a_1).. true S6 -
4891 '---> S6: a_new = .... - S4 -
4892 S5: ... = ..use(a_0).. - - -
4894 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4895 to each other through the RELATED_STMT field).
4897 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4898 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4899 remain irrelevant unless used by stmts other than S4.
4901 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4902 (because they are marked as irrelevant). It will vectorize S6, and record
4903 a pointer to the new vector stmt VS6 from S6 (as usual).
4904 S4 will be skipped, and S5 will be vectorized as usual:
4906 in_pattern_p related_stmt vec_stmt
4907 S1: a_i = .... - - -
4908 S2: a_2 = ..use(a_i).. - - -
4909 S3: a_1 = ..use(a_2).. - - -
4910 > VS6: va_new = .... - - -
4911 S4: a_0 = ..use(a_1).. true S6 VS6
4912 '---> S6: a_new = .... - S4 VS6
4913 > VS5: ... = ..vuse(va_new).. - - -
4914 S5: ... = ..use(a_0).. - - -
4916 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4917 elsewhere), and we'll end up with:
4919 VS6: va_new = ....
4920 VS5: ... = ..vuse(va_new)..
4922 In case of more than one pattern statements, e.g., widen-mult with
4923 intermediate type:
4925 S1 a_t = ;
4926 S2 a_T = (TYPE) a_t;
4927 '--> S3: a_it = (interm_type) a_t;
4928 S4 prod_T = a_T * CONST;
4929 '--> S5: prod_T' = a_it w* CONST;
4931 there may be other users of a_T outside the pattern. In that case S2 will
4932 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4933 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4934 be recorded in S3. */
4936 void
4937 vect_pattern_recog (vec_info *vinfo)
4939 struct loop *loop;
4940 basic_block *bbs;
4941 unsigned int nbbs;
4942 gimple_stmt_iterator si;
4943 unsigned int i, j;
4945 vect_determine_precisions (vinfo);
4947 DUMP_VECT_SCOPE ("vect_pattern_recog");
4949 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4951 loop = LOOP_VINFO_LOOP (loop_vinfo);
4952 bbs = LOOP_VINFO_BBS (loop_vinfo);
4953 nbbs = loop->num_nodes;
4955 /* Scan through the loop stmts, applying the pattern recognition
4956 functions starting at each stmt visited: */
4957 for (i = 0; i < nbbs; i++)
4959 basic_block bb = bbs[i];
4960 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4962 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
4963 /* Scan over all generic vect_recog_xxx_pattern functions. */
4964 for (j = 0; j < NUM_PATTERNS; j++)
4965 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
4966 stmt_info);
4970 else
4972 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4973 for (si = bb_vinfo->region_begin;
4974 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4976 gimple *stmt = gsi_stmt (si);
4977 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
4978 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
4979 continue;
4981 /* Scan over all generic vect_recog_xxx_pattern functions. */
4982 for (j = 0; j < NUM_PATTERNS; j++)
4983 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);