[15/n] PR85694: Try to split existing casts in widened patterns
[official-gcc.git] / gcc / tree-vect-patterns.c
bloba1649d8b0fec5b5486522de83dfd546b2e6ce7c6
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
53 static bool
54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
56 value_range_type vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
71 return true;
73 else
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
80 return false;
84 /* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
87 static void
88 vect_pattern_detected (const char *name, gimple *stmt)
90 if (dump_enabled_p ())
92 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: ", name);
93 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
97 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO.
98 Set its vector type to VECTYPE if it doesn't have one already. */
100 static void
101 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
102 tree vectype)
104 stmt_vec_info pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
105 if (pattern_stmt_info == NULL)
107 pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
108 orig_stmt_info->vinfo);
109 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
111 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
113 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info->stmt;
114 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
115 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
116 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
117 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
120 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
121 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
122 have one already. */
124 static void
125 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
126 tree vectype)
128 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
129 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
130 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
133 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
134 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
135 be different from the vector type of the final pattern statement. */
137 static inline void
138 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
139 tree vectype = NULL_TREE)
141 vec_info *vinfo = stmt_info->vinfo;
142 if (vectype)
144 gcc_assert (!vinfo_for_stmt (new_stmt));
145 stmt_vec_info new_stmt_info = new_stmt_vec_info (new_stmt, vinfo);
146 set_vinfo_for_stmt (new_stmt, new_stmt_info);
147 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
149 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
150 new_stmt);
153 static inline void
154 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
156 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
157 append_pattern_def_seq (stmt_info, stmt);
160 /* The caller wants to perform new operations on vect_external variable
161 VAR, so that the result of the operations would also be vect_external.
162 Return the edge on which the operations can be performed, if one exists.
163 Return null if the operations should instead be treated as part of
164 the pattern that needs them. */
166 static edge
167 vect_get_external_def_edge (vec_info *vinfo, tree var)
169 edge e = NULL;
170 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
172 e = loop_preheader_edge (loop_vinfo->loop);
173 if (!SSA_NAME_IS_DEFAULT_DEF (var))
175 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
176 if (bb == NULL
177 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
178 e = NULL;
181 return e;
184 /* Return true if the target supports a vector version of CODE,
185 where CODE is known to map to a direct optab. ITYPE specifies
186 the type of (some of) the scalar inputs and OTYPE specifies the
187 type of the scalar result.
189 If CODE allows the inputs and outputs to have different type
190 (such as for WIDEN_SUM_EXPR), it is the input mode rather
191 than the output mode that determines the appropriate target pattern.
192 Operand 0 of the target pattern then specifies the mode that the output
193 must have.
195 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
196 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
197 is nonnull. */
199 static bool
200 vect_supportable_direct_optab_p (tree otype, tree_code code,
201 tree itype, tree *vecotype_out,
202 tree *vecitype_out = NULL)
204 tree vecitype = get_vectype_for_scalar_type (itype);
205 if (!vecitype)
206 return false;
208 tree vecotype = get_vectype_for_scalar_type (otype);
209 if (!vecotype)
210 return false;
212 optab optab = optab_for_tree_code (code, vecitype, optab_default);
213 if (!optab)
214 return false;
216 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
217 if (icode == CODE_FOR_nothing
218 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
219 return false;
221 *vecotype_out = vecotype;
222 if (vecitype_out)
223 *vecitype_out = vecitype;
224 return true;
227 /* Round bit precision PRECISION up to a full element. */
229 static unsigned int
230 vect_element_precision (unsigned int precision)
232 precision = 1 << ceil_log2 (precision);
233 return MAX (precision, BITS_PER_UNIT);
236 /* If OP is defined by a statement that's being considered for vectorization,
237 return information about that statement, otherwise return NULL. */
239 static stmt_vec_info
240 vect_get_internal_def (vec_info *vinfo, tree op)
242 vect_def_type dt;
243 gimple *def_stmt;
244 if (TREE_CODE (op) != SSA_NAME
245 || !vect_is_simple_use (op, vinfo, &dt, &def_stmt)
246 || dt != vect_internal_def)
247 return NULL;
249 return vinfo_for_stmt (def_stmt);
252 /* Check whether NAME, an ssa-name used in USE_STMT,
253 is a result of a type promotion, such that:
254 DEF_STMT: NAME = NOP (name0)
255 If CHECK_SIGN is TRUE, check that either both types are signed or both are
256 unsigned. */
258 static bool
259 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
260 tree *orig_type, gimple **def_stmt, bool *promotion)
262 stmt_vec_info stmt_vinfo;
263 tree type = TREE_TYPE (name);
264 tree oprnd0;
265 enum vect_def_type dt;
267 stmt_vinfo = vinfo_for_stmt (use_stmt);
268 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, def_stmt))
269 return false;
271 if (dt != vect_internal_def
272 && dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 if (!*def_stmt)
276 return false;
278 if (!is_gimple_assign (*def_stmt))
279 return false;
281 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
282 return false;
284 oprnd0 = gimple_assign_rhs1 (*def_stmt);
286 *orig_type = TREE_TYPE (oprnd0);
287 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
288 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
289 return false;
291 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
292 *promotion = true;
293 else
294 *promotion = false;
296 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
297 return false;
299 return true;
302 /* Holds information about an input operand after some sign changes
303 and type promotions have been peeled away. */
304 struct vect_unpromoted_value {
305 vect_unpromoted_value ();
307 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
309 /* The value obtained after peeling away zero or more casts. */
310 tree op;
312 /* The type of OP. */
313 tree type;
315 /* The definition type of OP. */
316 vect_def_type dt;
318 /* If OP is the result of peeling at least one cast, and if the cast
319 of OP itself is a vectorizable statement, CASTER identifies that
320 statement, otherwise it is null. */
321 stmt_vec_info caster;
324 inline vect_unpromoted_value::vect_unpromoted_value ()
325 : op (NULL_TREE),
326 type (NULL_TREE),
327 dt (vect_uninitialized_def),
328 caster (NULL)
332 /* Set the operand to OP_IN, its definition type to DT_IN, and the
333 statement that casts it to CASTER_IN. */
335 inline void
336 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
337 stmt_vec_info caster_in)
339 op = op_in;
340 type = TREE_TYPE (op);
341 dt = dt_in;
342 caster = caster_in;
345 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
346 to reach some vectorizable inner operand OP', continuing as long as it
347 is possible to convert OP' back to OP using a possible sign change
348 followed by a possible promotion P. Return this OP', or null if OP is
349 not a vectorizable SSA name. If there is a promotion P, describe its
350 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
351 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
352 have more than one user.
354 A successful return means that it is possible to go from OP' to OP
355 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
356 whereas the cast from UNPROM to OP might be a promotion, a sign
357 change, or a nop.
359 E.g. say we have:
361 signed short *ptr = ...;
362 signed short C = *ptr;
363 unsigned short B = (unsigned short) C; // sign change
364 signed int A = (signed int) B; // unsigned promotion
365 ...possible other uses of A...
366 unsigned int OP = (unsigned int) A; // sign change
368 In this case it's possible to go directly from C to OP using:
370 OP = (unsigned int) (unsigned short) C;
371 +------------+ +--------------+
372 promotion sign change
374 so OP' would be C. The input to the promotion is B, so UNPROM
375 would describe B. */
377 static tree
378 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
379 vect_unpromoted_value *unprom,
380 bool *single_use_p = NULL)
382 tree res = NULL_TREE;
383 tree op_type = TREE_TYPE (op);
384 unsigned int orig_precision = TYPE_PRECISION (op_type);
385 stmt_vec_info caster = NULL;
386 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
388 /* See whether OP is simple enough to vectorize. */
389 gimple *def_stmt;
390 vect_def_type dt;
391 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt))
392 break;
394 /* If OP is the input of a demotion, skip over it to see whether
395 OP is itself the result of a promotion. If so, the combined
396 effect of the promotion and the demotion might fit the required
397 pattern, otherwise neither operation fits.
399 This copes with cases such as the result of an arithmetic
400 operation being truncated before being stored, and where that
401 arithmetic operation has been recognized as an over-widened one. */
402 if (TYPE_PRECISION (op_type) <= orig_precision)
404 /* Use OP as the UNPROM described above if we haven't yet
405 found a promotion, or if using the new input preserves the
406 sign of the previous promotion. */
407 if (!res
408 || TYPE_PRECISION (unprom->type) == orig_precision
409 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
410 unprom->set_op (op, dt, caster);
411 /* Stop if we've already seen a promotion and if this
412 conversion does more than change the sign. */
413 else if (TYPE_PRECISION (op_type)
414 != TYPE_PRECISION (unprom->type))
415 break;
417 /* The sequence now extends to OP. */
418 res = op;
421 /* See whether OP is defined by a cast. Record it as CASTER if
422 the cast is potentially vectorizable. */
423 if (!def_stmt)
424 break;
425 if (dt == vect_internal_def)
427 caster = vinfo_for_stmt (def_stmt);
428 /* Ignore pattern statements, since we don't link uses for them. */
429 if (single_use_p
430 && !STMT_VINFO_RELATED_STMT (caster)
431 && !has_single_use (res))
432 *single_use_p = false;
434 else
435 caster = NULL;
436 gassign *assign = dyn_cast <gassign *> (def_stmt);
437 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
438 break;
440 /* Continue with the input to the cast. */
441 op = gimple_assign_rhs1 (def_stmt);
442 op_type = TREE_TYPE (op);
444 return res;
447 /* OP is an integer operand to an operation that returns TYPE, and we
448 want to treat the operation as a widening one. So far we can treat
449 it as widening from *COMMON_TYPE.
451 Return true if OP is suitable for such a widening operation,
452 either widening from *COMMON_TYPE or from some supertype of it.
453 Update *COMMON_TYPE to the supertype in the latter case.
455 SHIFT_P is true if OP is a shift amount. */
457 static bool
458 vect_joust_widened_integer (tree type, bool shift_p, tree op,
459 tree *common_type)
461 /* Calculate the minimum precision required by OP, without changing
462 the sign of either operand. */
463 unsigned int precision;
464 if (shift_p)
466 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
467 return false;
468 precision = TREE_INT_CST_LOW (op);
470 else
472 precision = wi::min_precision (wi::to_widest (op),
473 TYPE_SIGN (*common_type));
474 if (precision * 2 > TYPE_PRECISION (type))
475 return false;
478 /* If OP requires a wider type, switch to that type. The checks
479 above ensure that this is still narrower than the result. */
480 precision = vect_element_precision (precision);
481 if (TYPE_PRECISION (*common_type) < precision)
482 *common_type = build_nonstandard_integer_type
483 (precision, TYPE_UNSIGNED (*common_type));
484 return true;
487 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
488 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
490 static bool
491 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
493 if (types_compatible_p (*common_type, new_type))
494 return true;
496 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
497 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
498 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
499 return true;
501 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
502 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
503 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
505 *common_type = new_type;
506 return true;
509 /* We have mismatched signs, with the signed type being
510 no wider than the unsigned type. In this case we need
511 a wider signed type. */
512 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
513 TYPE_PRECISION (new_type));
514 precision *= 2;
515 if (precision * 2 > TYPE_PRECISION (type))
516 return false;
518 *common_type = build_nonstandard_integer_type (precision, false);
519 return true;
522 /* Check whether STMT_INFO can be viewed as a tree of integer operations
523 in which each node either performs CODE or WIDENED_CODE, and where
524 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
525 specifies the maximum number of leaf operands. SHIFT_P says whether
526 CODE and WIDENED_CODE are some sort of shift.
528 If STMT_INFO is such a tree, return the number of leaf operands
529 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
530 to a type that (a) is narrower than the result of STMT_INFO and
531 (b) can hold all leaf operand values.
533 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
534 exists. */
536 static unsigned int
537 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
538 tree_code widened_code, bool shift_p,
539 unsigned int max_nops,
540 vect_unpromoted_value *unprom, tree *common_type)
542 /* Check for an integer operation with the right code. */
543 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
544 if (!assign)
545 return 0;
547 tree_code rhs_code = gimple_assign_rhs_code (assign);
548 if (rhs_code != code && rhs_code != widened_code)
549 return 0;
551 tree type = gimple_expr_type (assign);
552 if (!INTEGRAL_TYPE_P (type))
553 return 0;
555 /* Assume that both operands will be leaf operands. */
556 max_nops -= 2;
558 /* Check the operands. */
559 unsigned int next_op = 0;
560 for (unsigned int i = 0; i < 2; ++i)
562 vect_unpromoted_value *this_unprom = &unprom[next_op];
563 unsigned int nops = 1;
564 tree op = gimple_op (assign, i + 1);
565 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
567 /* We already have a common type from earlier operands.
568 Update it to account for OP. */
569 this_unprom->set_op (op, vect_constant_def);
570 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
571 return 0;
573 else
575 /* Only allow shifts by constants. */
576 if (shift_p && i == 1)
577 return 0;
579 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
580 this_unprom))
581 return 0;
583 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
585 /* The operand isn't widened. If STMT_INFO has the code
586 for an unwidened operation, recursively check whether
587 this operand is a node of the tree. */
588 if (rhs_code != code
589 || max_nops == 0
590 || this_unprom->dt != vect_internal_def)
591 return 0;
593 /* Give back the leaf slot allocated above now that we're
594 not treating this as a leaf operand. */
595 max_nops += 1;
597 /* Recursively process the definition of the operand. */
598 stmt_vec_info def_stmt_info
599 = vinfo_for_stmt (SSA_NAME_DEF_STMT (this_unprom->op));
600 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
601 shift_p, max_nops, this_unprom,
602 common_type);
603 if (nops == 0)
604 return 0;
606 max_nops -= nops;
608 else
610 /* Make sure that the operand is narrower than the result. */
611 if (TYPE_PRECISION (this_unprom->type) * 2
612 > TYPE_PRECISION (type))
613 return 0;
615 /* Update COMMON_TYPE for the new operand. */
616 if (i == 0)
617 *common_type = this_unprom->type;
618 else if (!vect_joust_widened_type (type, this_unprom->type,
619 common_type))
620 return 0;
623 next_op += nops;
625 return next_op;
628 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
629 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
631 static tree
632 vect_recog_temp_ssa_var (tree type, gimple *stmt)
634 return make_temp_ssa_name (type, stmt, "patt");
637 /* STMT2_INFO describes a type conversion that could be split into STMT1
638 followed by a version of STMT2_INFO that takes NEW_RHS as its first
639 input. Try to do this using pattern statements, returning true on
640 success. */
642 static bool
643 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
644 gimple *stmt1, tree vectype)
646 if (is_pattern_stmt_p (stmt2_info))
648 /* STMT2_INFO is part of a pattern. Get the statement to which
649 the pattern is attached. */
650 stmt_vec_info orig_stmt2_info
651 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt2_info));
652 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE, vect_location,
657 "Splitting pattern statement: ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
661 /* Since STMT2_INFO is a pattern statement, we can change it
662 in-situ without worrying about changing the code for the
663 containing block. */
664 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
666 if (dump_enabled_p ())
668 dump_printf_loc (MSG_NOTE, vect_location, "into: ");
669 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
670 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
674 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
675 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info->stmt)
676 /* STMT2_INFO is the actual pattern statement. Add STMT1
677 to the end of the definition sequence. */
678 gimple_seq_add_stmt_without_update (def_seq, stmt1);
679 else
681 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
682 before it. */
683 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
684 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
686 return true;
688 else
690 /* STMT2_INFO doesn't yet have a pattern. Try to create a
691 two-statement pattern now. */
692 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
693 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
694 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
695 if (!lhs_vectype)
696 return false;
698 if (dump_enabled_p ())
700 dump_printf_loc (MSG_NOTE, vect_location,
701 "Splitting statement: ");
702 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
705 /* Add STMT1 as a singleton pattern definition sequence. */
706 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
707 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
708 gimple_seq_add_stmt_without_update (def_seq, stmt1);
710 /* Build the second of the two pattern statements. */
711 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
712 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
713 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
715 if (dump_enabled_p ())
717 dump_printf_loc (MSG_NOTE, vect_location,
718 "into pattern statements: ");
719 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
720 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
721 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt2, 0);
724 return true;
728 /* Convert UNPROM to TYPE and return the result, adding new statements
729 to STMT_INFO's pattern definition statements if no better way is
730 available. VECTYPE is the vector form of TYPE. */
732 static tree
733 vect_convert_input (stmt_vec_info stmt_info, tree type,
734 vect_unpromoted_value *unprom, tree vectype)
736 /* Check for a no-op conversion. */
737 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
738 return unprom->op;
740 /* Allow the caller to create constant vect_unpromoted_values. */
741 if (TREE_CODE (unprom->op) == INTEGER_CST)
742 return wide_int_to_tree (type, wi::to_widest (unprom->op));
744 /* See if we can reuse an existing result. */
745 if (unprom->caster)
747 tree lhs = gimple_get_lhs (unprom->caster->stmt);
748 if (types_compatible_p (TREE_TYPE (lhs), type))
749 return lhs;
752 /* We need a new conversion statement. */
753 tree new_op = vect_recog_temp_ssa_var (type, NULL);
754 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
756 /* If the operation is the input to a vectorizable cast, try splitting
757 that cast into two, taking the required result as a mid-way point. */
758 if (unprom->caster)
760 tree lhs = gimple_get_lhs (unprom->caster->stmt);
761 if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
762 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
763 && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
764 && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
765 return new_op;
768 /* If OP is an external value, see if we can insert the new statement
769 on an incoming edge. */
770 if (unprom->dt == vect_external_def)
771 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
773 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
774 gcc_assert (!new_bb);
775 return new_op;
778 /* As a (common) last resort, add the statement to the pattern itself. */
779 append_pattern_def_seq (stmt_info, new_stmt, vectype);
780 return new_op;
783 /* Invoke vect_convert_input for N elements of UNPROM and store the
784 result in the corresponding elements of RESULT. */
786 static void
787 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
788 tree *result, tree type, vect_unpromoted_value *unprom,
789 tree vectype)
791 for (unsigned int i = 0; i < n; ++i)
793 unsigned int j;
794 for (j = 0; j < i; ++j)
795 if (unprom[j].op == unprom[i].op)
796 break;
797 if (j < i)
798 result[i] = result[j];
799 else
800 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
804 /* The caller has created a (possibly empty) sequence of pattern definition
805 statements followed by a single statement PATTERN_STMT. Cast the result
806 of this final statement to TYPE. If a new statement is needed, add
807 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
808 and return the new statement, otherwise return PATTERN_STMT as-is.
809 VECITYPE is the vector form of PATTERN_STMT's result type. */
811 static gimple *
812 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
813 tree vecitype)
815 tree lhs = gimple_get_lhs (pattern_stmt);
816 if (!types_compatible_p (type, TREE_TYPE (lhs)))
818 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
819 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
820 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
822 return pattern_stmt;
825 /* Return true if STMT_VINFO describes a reduction for which reassociation
826 is allowed. If STMT_INFO is part of a group, assume that it's part of
827 a reduction chain and optimistically assume that all statements
828 except the last allow reassociation. */
830 static bool
831 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
833 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
834 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
835 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
838 /* As above, but also require it to have code CODE and to be a reduction
839 in the outermost loop. When returning true, store the operands in
840 *OP0_OUT and *OP1_OUT. */
842 static bool
843 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
844 tree *op0_out, tree *op1_out)
846 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
847 if (!loop_info)
848 return false;
850 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
851 if (!assign || gimple_assign_rhs_code (assign) != code)
852 return false;
854 /* We don't allow changing the order of the computation in the inner-loop
855 when doing outer-loop vectorization. */
856 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
857 if (loop && nested_in_vect_loop_p (loop, assign))
858 return false;
860 if (!vect_reassociating_reduction_p (stmt_info))
861 return false;
863 *op0_out = gimple_assign_rhs1 (assign);
864 *op1_out = gimple_assign_rhs2 (assign);
865 return true;
868 /* Function vect_recog_dot_prod_pattern
870 Try to find the following pattern:
872 type x_t, y_t;
873 TYPE1 prod;
874 TYPE2 sum = init;
875 loop:
876 sum_0 = phi <init, sum_1>
877 S1 x_t = ...
878 S2 y_t = ...
879 S3 x_T = (TYPE1) x_t;
880 S4 y_T = (TYPE1) y_t;
881 S5 prod = x_T * y_T;
882 [S6 prod = (TYPE2) prod; #optional]
883 S7 sum_1 = prod + sum_0;
885 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
886 same size of 'TYPE1' or bigger. This is a special case of a reduction
887 computation.
889 Input:
891 * STMTS: Contains a stmt from which the pattern search begins. In the
892 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
893 will be detected.
895 Output:
897 * TYPE_OUT: The type of the output of this pattern.
899 * Return value: A new stmt that will be used to replace the sequence of
900 stmts that constitute the pattern. In this case it will be:
901 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
903 Note: The dot-prod idiom is a widening reduction pattern that is
904 vectorized without preserving all the intermediate results. It
905 produces only N/2 (widened) results (by summing up pairs of
906 intermediate results) rather than all N results. Therefore, we
907 cannot allow this pattern when we want to get all the results and in
908 the correct order (as is the case when this computation is in an
909 inner-loop nested in an outer-loop that us being vectorized). */
911 static gimple *
912 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_out)
914 gimple *last_stmt = (*stmts)[0];
915 tree oprnd0, oprnd1;
916 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
917 vec_info *vinfo = stmt_vinfo->vinfo;
918 tree type, half_type;
919 gimple *pattern_stmt;
920 tree var;
922 /* Look for the following pattern
923 DX = (TYPE1) X;
924 DY = (TYPE1) Y;
925 DPROD = DX * DY;
926 DDPROD = (TYPE2) DPROD;
927 sum_1 = DDPROD + sum_0;
928 In which
929 - DX is double the size of X
930 - DY is double the size of Y
931 - DX, DY, DPROD all have the same type
932 - sum is the same size of DPROD or bigger
933 - sum has been recognized as a reduction variable.
935 This is equivalent to:
936 DPROD = X w* Y; #widen mult
937 sum_1 = DPROD w+ sum_0; #widen summation
939 DPROD = X w* Y; #widen mult
940 sum_1 = DPROD + sum_0; #summation
943 /* Starting from LAST_STMT, follow the defs of its uses in search
944 of the above pattern. */
946 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
947 &oprnd0, &oprnd1))
948 return NULL;
950 type = gimple_expr_type (last_stmt);
952 vect_unpromoted_value unprom_mult;
953 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
955 /* So far so good. Since last_stmt was detected as a (summation) reduction,
956 we know that oprnd1 is the reduction variable (defined by a loop-header
957 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
958 Left to check that oprnd0 is defined by a (widen_)mult_expr */
959 if (!oprnd0)
960 return NULL;
962 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
963 if (!mult_vinfo)
964 return NULL;
966 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
967 inside the loop (in case we are analyzing an outer-loop). */
968 vect_unpromoted_value unprom0[2];
969 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
970 false, 2, unprom0, &half_type))
971 return NULL;
973 /* If there are two widening operations, make sure they agree on
974 the sign of the extension. */
975 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
976 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
977 return NULL;
979 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
981 tree half_vectype;
982 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
983 type_out, &half_vectype))
984 return NULL;
986 /* Get the inputs in the appropriate types. */
987 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
988 tree mult_oprnd[2];
989 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
990 unprom0, half_vectype);
992 var = vect_recog_temp_ssa_var (type, NULL);
993 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
994 mult_oprnd[0], mult_oprnd[1], oprnd1);
996 return pattern_stmt;
1000 /* Function vect_recog_sad_pattern
1002 Try to find the following Sum of Absolute Difference (SAD) pattern:
1004 type x_t, y_t;
1005 signed TYPE1 diff, abs_diff;
1006 TYPE2 sum = init;
1007 loop:
1008 sum_0 = phi <init, sum_1>
1009 S1 x_t = ...
1010 S2 y_t = ...
1011 S3 x_T = (TYPE1) x_t;
1012 S4 y_T = (TYPE1) y_t;
1013 S5 diff = x_T - y_T;
1014 S6 abs_diff = ABS_EXPR <diff>;
1015 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1016 S8 sum_1 = abs_diff + sum_0;
1018 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1019 same size of 'TYPE1' or bigger. This is a special case of a reduction
1020 computation.
1022 Input:
1024 * STMTS: Contains a stmt from which the pattern search begins. In the
1025 example, when this function is called with S8, the pattern
1026 {S3,S4,S5,S6,S7,S8} will be detected.
1028 Output:
1030 * TYPE_OUT: The type of the output of this pattern.
1032 * Return value: A new stmt that will be used to replace the sequence of
1033 stmts that constitute the pattern. In this case it will be:
1034 SAD_EXPR <x_t, y_t, sum_0>
1037 static gimple *
1038 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_out)
1040 gimple *last_stmt = (*stmts)[0];
1041 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1042 vec_info *vinfo = stmt_vinfo->vinfo;
1043 tree half_type;
1045 /* Look for the following pattern
1046 DX = (TYPE1) X;
1047 DY = (TYPE1) Y;
1048 DDIFF = DX - DY;
1049 DAD = ABS_EXPR <DDIFF>;
1050 DDPROD = (TYPE2) DPROD;
1051 sum_1 = DAD + sum_0;
1052 In which
1053 - DX is at least double the size of X
1054 - DY is at least double the size of Y
1055 - DX, DY, DDIFF, DAD all have the same type
1056 - sum is the same size of DAD or bigger
1057 - sum has been recognized as a reduction variable.
1059 This is equivalent to:
1060 DDIFF = X w- Y; #widen sub
1061 DAD = ABS_EXPR <DDIFF>;
1062 sum_1 = DAD w+ sum_0; #widen summation
1064 DDIFF = X w- Y; #widen sub
1065 DAD = ABS_EXPR <DDIFF>;
1066 sum_1 = DAD + sum_0; #summation
1069 /* Starting from LAST_STMT, follow the defs of its uses in search
1070 of the above pattern. */
1072 tree plus_oprnd0, plus_oprnd1;
1073 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1074 &plus_oprnd0, &plus_oprnd1))
1075 return NULL;
1077 tree sum_type = gimple_expr_type (last_stmt);
1079 /* Any non-truncating sequence of conversions is OK here, since
1080 with a successful match, the result of the ABS(U) is known to fit
1081 within the nonnegative range of the result type. (It cannot be the
1082 negative of the minimum signed value due to the range of the widening
1083 MINUS_EXPR.) */
1084 vect_unpromoted_value unprom_abs;
1085 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1086 &unprom_abs);
1088 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1089 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1090 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1091 Then check that plus_oprnd0 is defined by an abs_expr. */
1093 if (!plus_oprnd0)
1094 return NULL;
1096 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1097 if (!abs_stmt_vinfo)
1098 return NULL;
1100 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1101 inside the loop (in case we are analyzing an outer-loop). */
1102 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1103 if (!abs_stmt
1104 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1105 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1106 return NULL;
1108 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1109 tree abs_type = TREE_TYPE (abs_oprnd);
1110 if (TYPE_UNSIGNED (abs_type))
1111 return NULL;
1113 /* Peel off conversions from the ABS input. This can involve sign
1114 changes (e.g. from an unsigned subtraction to a signed ABS input)
1115 or signed promotion, but it can't include unsigned promotion.
1116 (Note that ABS of an unsigned promotion should have been folded
1117 away before now anyway.) */
1118 vect_unpromoted_value unprom_diff;
1119 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1120 &unprom_diff);
1121 if (!abs_oprnd)
1122 return NULL;
1123 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1124 && TYPE_UNSIGNED (unprom_diff.type))
1125 return NULL;
1127 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1128 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1129 if (!diff_stmt_vinfo)
1130 return NULL;
1132 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1133 inside the loop (in case we are analyzing an outer-loop). */
1134 vect_unpromoted_value unprom[2];
1135 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1136 false, 2, unprom, &half_type))
1137 return NULL;
1139 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1141 tree half_vectype;
1142 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1143 type_out, &half_vectype))
1144 return NULL;
1146 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1147 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1148 tree sad_oprnd[2];
1149 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1150 unprom, half_vectype);
1152 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1153 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1154 sad_oprnd[1], plus_oprnd1);
1156 return pattern_stmt;
1159 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1160 so that it can be treated as though it had the form:
1162 A_TYPE a;
1163 B_TYPE b;
1164 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1165 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1166 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1167 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1168 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1170 Try to replace the pattern with:
1172 A_TYPE a;
1173 B_TYPE b;
1174 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1175 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1176 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1177 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1179 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1181 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1182 name of the pattern being matched, for dump purposes. */
1184 static gimple *
1185 vect_recog_widen_op_pattern (vec<gimple *> *stmts, tree *type_out,
1186 tree_code orig_code, tree_code wide_code,
1187 bool shift_p, const char *name)
1189 gimple *last_stmt = stmts->pop ();
1190 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
1192 vect_unpromoted_value unprom[2];
1193 tree half_type;
1194 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1195 shift_p, 2, unprom, &half_type))
1196 return NULL;
1198 /* Pattern detected. */
1199 vect_pattern_detected (name, last_stmt);
1201 tree type = gimple_expr_type (last_stmt);
1202 tree itype = type;
1203 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1204 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1205 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1206 TYPE_UNSIGNED (half_type));
1208 /* Check target support */
1209 tree vectype = get_vectype_for_scalar_type (half_type);
1210 tree vecitype = get_vectype_for_scalar_type (itype);
1211 enum tree_code dummy_code;
1212 int dummy_int;
1213 auto_vec<tree> dummy_vec;
1214 if (!vectype
1215 || !vecitype
1216 || !supportable_widening_operation (wide_code, last_stmt,
1217 vecitype, vectype,
1218 &dummy_code, &dummy_code,
1219 &dummy_int, &dummy_vec))
1220 return NULL;
1222 *type_out = get_vectype_for_scalar_type (type);
1223 if (!*type_out)
1224 return NULL;
1226 STMT_VINFO_PATTERN_DEF_SEQ (last_stmt_info) = NULL;
1227 tree oprnd[2];
1228 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1230 tree var = vect_recog_temp_ssa_var (itype, NULL);
1231 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1232 oprnd[0], oprnd[1]);
1234 stmts->safe_push (last_stmt);
1235 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1238 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1239 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1241 static gimple *
1242 vect_recog_widen_mult_pattern (vec<gimple *> *stmts, tree *type_out)
1244 return vect_recog_widen_op_pattern (stmts, type_out, MULT_EXPR,
1245 WIDEN_MULT_EXPR, false,
1246 "vect_recog_widen_mult_pattern");
1249 /* Function vect_recog_pow_pattern
1251 Try to find the following pattern:
1253 x = POW (y, N);
1255 with POW being one of pow, powf, powi, powif and N being
1256 either 2 or 0.5.
1258 Input:
1260 * LAST_STMT: A stmt from which the pattern search begins.
1262 Output:
1264 * TYPE_OUT: The type of the output of this pattern.
1266 * Return value: A new stmt that will be used to replace the sequence of
1267 stmts that constitute the pattern. In this case it will be:
1268 x = x * x
1270 x = sqrt (x)
1273 static gimple *
1274 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_out)
1276 gimple *last_stmt = (*stmts)[0];
1277 tree base, exp;
1278 gimple *stmt;
1279 tree var;
1281 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1282 return NULL;
1284 switch (gimple_call_combined_fn (last_stmt))
1286 CASE_CFN_POW:
1287 CASE_CFN_POWI:
1288 break;
1290 default:
1291 return NULL;
1294 base = gimple_call_arg (last_stmt, 0);
1295 exp = gimple_call_arg (last_stmt, 1);
1296 if (TREE_CODE (exp) != REAL_CST
1297 && TREE_CODE (exp) != INTEGER_CST)
1299 if (flag_unsafe_math_optimizations
1300 && TREE_CODE (base) == REAL_CST
1301 && !gimple_call_internal_p (last_stmt))
1303 combined_fn log_cfn;
1304 built_in_function exp_bfn;
1305 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1307 case BUILT_IN_POW:
1308 log_cfn = CFN_BUILT_IN_LOG;
1309 exp_bfn = BUILT_IN_EXP;
1310 break;
1311 case BUILT_IN_POWF:
1312 log_cfn = CFN_BUILT_IN_LOGF;
1313 exp_bfn = BUILT_IN_EXPF;
1314 break;
1315 case BUILT_IN_POWL:
1316 log_cfn = CFN_BUILT_IN_LOGL;
1317 exp_bfn = BUILT_IN_EXPL;
1318 break;
1319 default:
1320 return NULL;
1322 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1323 tree exp_decl = builtin_decl_implicit (exp_bfn);
1324 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1325 does that, but if C is a power of 2, we want to use
1326 exp2 (log2 (C) * x) in the non-vectorized version, but for
1327 vectorization we don't have vectorized exp2. */
1328 if (logc
1329 && TREE_CODE (logc) == REAL_CST
1330 && exp_decl
1331 && lookup_attribute ("omp declare simd",
1332 DECL_ATTRIBUTES (exp_decl)))
1334 cgraph_node *node = cgraph_node::get_create (exp_decl);
1335 if (node->simd_clones == NULL)
1337 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1338 || node->definition)
1339 return NULL;
1340 expand_simd_clones (node);
1341 if (node->simd_clones == NULL)
1342 return NULL;
1344 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1345 if (!*type_out)
1346 return NULL;
1347 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1348 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1349 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1350 new_pattern_def_seq (stmt_vinfo, g);
1351 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1352 g = gimple_build_call (exp_decl, 1, def);
1353 gimple_call_set_lhs (g, res);
1354 return g;
1358 return NULL;
1361 /* We now have a pow or powi builtin function call with a constant
1362 exponent. */
1364 /* Catch squaring. */
1365 if ((tree_fits_shwi_p (exp)
1366 && tree_to_shwi (exp) == 2)
1367 || (TREE_CODE (exp) == REAL_CST
1368 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1370 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1371 TREE_TYPE (base), type_out))
1372 return NULL;
1374 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1375 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1376 return stmt;
1379 /* Catch square root. */
1380 if (TREE_CODE (exp) == REAL_CST
1381 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1383 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1384 if (*type_out
1385 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1386 OPTIMIZE_FOR_SPEED))
1388 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1389 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1390 gimple_call_set_lhs (stmt, var);
1391 gimple_call_set_nothrow (stmt, true);
1392 return stmt;
1396 return NULL;
1400 /* Function vect_recog_widen_sum_pattern
1402 Try to find the following pattern:
1404 type x_t;
1405 TYPE x_T, sum = init;
1406 loop:
1407 sum_0 = phi <init, sum_1>
1408 S1 x_t = *p;
1409 S2 x_T = (TYPE) x_t;
1410 S3 sum_1 = x_T + sum_0;
1412 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1413 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1414 a special case of a reduction computation.
1416 Input:
1418 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1419 when this function is called with S3, the pattern {S2,S3} will be detected.
1421 Output:
1423 * TYPE_OUT: The type of the output of this pattern.
1425 * Return value: A new stmt that will be used to replace the sequence of
1426 stmts that constitute the pattern. In this case it will be:
1427 WIDEN_SUM <x_t, sum_0>
1429 Note: The widening-sum idiom is a widening reduction pattern that is
1430 vectorized without preserving all the intermediate results. It
1431 produces only N/2 (widened) results (by summing up pairs of
1432 intermediate results) rather than all N results. Therefore, we
1433 cannot allow this pattern when we want to get all the results and in
1434 the correct order (as is the case when this computation is in an
1435 inner-loop nested in an outer-loop that us being vectorized). */
1437 static gimple *
1438 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_out)
1440 gimple *last_stmt = (*stmts)[0];
1441 tree oprnd0, oprnd1;
1442 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1443 vec_info *vinfo = stmt_vinfo->vinfo;
1444 tree type;
1445 gimple *pattern_stmt;
1446 tree var;
1448 /* Look for the following pattern
1449 DX = (TYPE) X;
1450 sum_1 = DX + sum_0;
1451 In which DX is at least double the size of X, and sum_1 has been
1452 recognized as a reduction variable.
1455 /* Starting from LAST_STMT, follow the defs of its uses in search
1456 of the above pattern. */
1458 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1459 &oprnd0, &oprnd1))
1460 return NULL;
1462 type = gimple_expr_type (last_stmt);
1464 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1465 we know that oprnd1 is the reduction variable (defined by a loop-header
1466 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1467 Left to check that oprnd0 is defined by a cast from type 'type' to type
1468 'TYPE'. */
1470 vect_unpromoted_value unprom0;
1471 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1472 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1473 return NULL;
1475 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1477 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1478 type_out))
1479 return NULL;
1481 var = vect_recog_temp_ssa_var (type, NULL);
1482 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1484 return pattern_stmt;
1487 /* Recognize cases in which an operation is performed in one type WTYPE
1488 but could be done more efficiently in a narrower type NTYPE. For example,
1489 if we have:
1491 ATYPE a; // narrower than NTYPE
1492 BTYPE b; // narrower than NTYPE
1493 WTYPE aw = (WTYPE) a;
1494 WTYPE bw = (WTYPE) b;
1495 WTYPE res = aw + bw; // only uses of aw and bw
1497 then it would be more efficient to do:
1499 NTYPE an = (NTYPE) a;
1500 NTYPE bn = (NTYPE) b;
1501 NTYPE resn = an + bn;
1502 WTYPE res = (WTYPE) resn;
1504 Other situations include things like:
1506 ATYPE a; // NTYPE or narrower
1507 WTYPE aw = (WTYPE) a;
1508 WTYPE res = aw + b;
1510 when only "(NTYPE) res" is significant. In that case it's more efficient
1511 to truncate "b" and do the operation on NTYPE instead:
1513 NTYPE an = (NTYPE) a;
1514 NTYPE bn = (NTYPE) b; // truncation
1515 NTYPE resn = an + bn;
1516 WTYPE res = (WTYPE) resn;
1518 All users of "res" should then use "resn" instead, making the final
1519 statement dead (not marked as relevant). The final statement is still
1520 needed to maintain the type correctness of the IR.
1522 vect_determine_precisions has already determined the minimum
1523 precison of the operation and the minimum precision required
1524 by users of the result. */
1526 static gimple *
1527 vect_recog_over_widening_pattern (vec<gimple *> *stmts, tree *type_out)
1529 gassign *last_stmt = dyn_cast <gassign *> (stmts->pop ());
1530 if (!last_stmt)
1531 return NULL;
1533 /* See whether we have found that this operation can be done on a
1534 narrower type without changing its semantics. */
1535 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
1536 unsigned int new_precision = last_stmt_info->operation_precision;
1537 if (!new_precision)
1538 return NULL;
1540 vec_info *vinfo = last_stmt_info->vinfo;
1541 tree lhs = gimple_assign_lhs (last_stmt);
1542 tree type = TREE_TYPE (lhs);
1543 tree_code code = gimple_assign_rhs_code (last_stmt);
1545 /* Keep the first operand of a COND_EXPR as-is: only the other two
1546 operands are interesting. */
1547 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1549 /* Check the operands. */
1550 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1551 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1552 unprom.quick_grow (nops);
1553 unsigned int min_precision = 0;
1554 bool single_use_p = false;
1555 for (unsigned int i = 0; i < nops; ++i)
1557 tree op = gimple_op (last_stmt, first_op + i);
1558 if (TREE_CODE (op) == INTEGER_CST)
1559 unprom[i].set_op (op, vect_constant_def);
1560 else if (TREE_CODE (op) == SSA_NAME)
1562 bool op_single_use_p = true;
1563 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1564 &op_single_use_p))
1565 return NULL;
1566 /* If:
1568 (1) N bits of the result are needed;
1569 (2) all inputs are widened from M<N bits; and
1570 (3) one operand OP is a single-use SSA name
1572 we can shift the M->N widening from OP to the output
1573 without changing the number or type of extensions involved.
1574 This then reduces the number of copies of STMT_INFO.
1576 If instead of (3) more than one operand is a single-use SSA name,
1577 shifting the extension to the output is even more of a win.
1579 If instead:
1581 (1) N bits of the result are needed;
1582 (2) one operand OP2 is widened from M2<N bits;
1583 (3) another operand OP1 is widened from M1<M2 bits; and
1584 (4) both OP1 and OP2 are single-use
1586 the choice is between:
1588 (a) truncating OP2 to M1, doing the operation on M1,
1589 and then widening the result to N
1591 (b) widening OP1 to M2, doing the operation on M2, and then
1592 widening the result to N
1594 Both shift the M2->N widening of the inputs to the output.
1595 (a) additionally shifts the M1->M2 widening to the output;
1596 it requires fewer copies of STMT_INFO but requires an extra
1597 M2->M1 truncation.
1599 Which is better will depend on the complexity and cost of
1600 STMT_INFO, which is hard to predict at this stage. However,
1601 a clear tie-breaker in favor of (b) is the fact that the
1602 truncation in (a) increases the length of the operation chain.
1604 If instead of (4) only one of OP1 or OP2 is single-use,
1605 (b) is still a win over doing the operation in N bits:
1606 it still shifts the M2->N widening on the single-use operand
1607 to the output and reduces the number of STMT_INFO copies.
1609 If neither operand is single-use then operating on fewer than
1610 N bits might lead to more extensions overall. Whether it does
1611 or not depends on global information about the vectorization
1612 region, and whether that's a good trade-off would again
1613 depend on the complexity and cost of the statements involved,
1614 as well as things like register pressure that are not normally
1615 modelled at this stage. We therefore ignore these cases
1616 and just optimize the clear single-use wins above.
1618 Thus we take the maximum precision of the unpromoted operands
1619 and record whether any operand is single-use. */
1620 if (unprom[i].dt == vect_internal_def)
1622 min_precision = MAX (min_precision,
1623 TYPE_PRECISION (unprom[i].type));
1624 single_use_p |= op_single_use_p;
1629 /* Although the operation could be done in operation_precision, we have
1630 to balance that against introducing extra truncations or extensions.
1631 Calculate the minimum precision that can be handled efficiently.
1633 The loop above determined that the operation could be handled
1634 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1635 extension from the inputs to the output without introducing more
1636 instructions, and would reduce the number of instructions required
1637 for STMT_INFO itself.
1639 vect_determine_precisions has also determined that the result only
1640 needs min_output_precision bits. Truncating by a factor of N times
1641 requires a tree of N - 1 instructions, so if TYPE is N times wider
1642 than min_output_precision, doing the operation in TYPE and truncating
1643 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1644 In contrast:
1646 - truncating the input to a unary operation and doing the operation
1647 in the new type requires at most N - 1 + 1 = N instructions per
1648 output vector
1650 - doing the same for a binary operation requires at most
1651 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1653 Both unary and binary operations require fewer instructions than
1654 this if the operands were extended from a suitable truncated form.
1655 Thus there is usually nothing to lose by doing operations in
1656 min_output_precision bits, but there can be something to gain. */
1657 if (!single_use_p)
1658 min_precision = last_stmt_info->min_output_precision;
1659 else
1660 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1662 /* Apply the minimum efficient precision we just calculated. */
1663 if (new_precision < min_precision)
1664 new_precision = min_precision;
1665 if (new_precision >= TYPE_PRECISION (type))
1666 return NULL;
1668 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1670 *type_out = get_vectype_for_scalar_type (type);
1671 if (!*type_out)
1672 return NULL;
1674 /* We've found a viable pattern. Get the new type of the operation. */
1675 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1676 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1678 /* We specifically don't check here whether the target supports the
1679 new operation, since it might be something that a later pattern
1680 wants to rewrite anyway. If targets have a minimum element size
1681 for some optabs, we should pattern-match smaller ops to larger ops
1682 where beneficial. */
1683 tree new_vectype = get_vectype_for_scalar_type (new_type);
1684 if (!new_vectype)
1685 return NULL;
1687 if (dump_enabled_p ())
1689 dump_printf_loc (MSG_NOTE, vect_location, "demoting ");
1690 dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
1691 dump_printf (MSG_NOTE, " to ");
1692 dump_generic_expr (MSG_NOTE, TDF_SLIM, new_type);
1693 dump_printf (MSG_NOTE, "\n");
1696 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1697 STMT_VINFO_PATTERN_DEF_SEQ (last_stmt_info) = NULL;
1698 tree ops[3] = {};
1699 for (unsigned int i = 1; i < first_op; ++i)
1700 ops[i - 1] = gimple_op (last_stmt, i);
1701 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1702 new_type, &unprom[0], new_vectype);
1704 /* Use the operation to produce a result of type NEW_TYPE. */
1705 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1706 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1707 ops[0], ops[1], ops[2]);
1708 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1710 if (dump_enabled_p ())
1712 dump_printf_loc (MSG_NOTE, vect_location,
1713 "created pattern stmt: ");
1714 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1717 pattern_stmt = vect_convert_output (last_stmt_info, type,
1718 pattern_stmt, new_vectype);
1720 stmts->safe_push (last_stmt);
1721 return pattern_stmt;
1724 /* Recognize cases in which the input to a cast is wider than its
1725 output, and the input is fed by a widening operation. Fold this
1726 by removing the unnecessary intermediate widening. E.g.:
1728 unsigned char a;
1729 unsigned int b = (unsigned int) a;
1730 unsigned short c = (unsigned short) b;
1734 unsigned short c = (unsigned short) a;
1736 Although this is rare in input IR, it is an expected side-effect
1737 of the over-widening pattern above.
1739 This is beneficial also for integer-to-float conversions, if the
1740 widened integer has more bits than the float, and if the unwidened
1741 input doesn't. */
1743 static gimple *
1744 vect_recog_cast_forwprop_pattern (vec<gimple *> *stmts, tree *type_out)
1746 /* Check for a cast, including an integer-to-float conversion. */
1747 gassign *last_stmt = dyn_cast <gassign *> (stmts->pop ());
1748 if (!last_stmt)
1749 return NULL;
1750 tree_code code = gimple_assign_rhs_code (last_stmt);
1751 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1752 return NULL;
1754 /* Make sure that the rhs is a scalar with a natural bitsize. */
1755 tree lhs = gimple_assign_lhs (last_stmt);
1756 if (!lhs)
1757 return NULL;
1758 tree lhs_type = TREE_TYPE (lhs);
1759 scalar_mode lhs_mode;
1760 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1761 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1762 return NULL;
1764 /* Check for a narrowing operation (from a vector point of view). */
1765 tree rhs = gimple_assign_rhs1 (last_stmt);
1766 tree rhs_type = TREE_TYPE (rhs);
1767 if (!INTEGRAL_TYPE_P (rhs_type)
1768 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1769 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1770 return NULL;
1772 /* Try to find an unpromoted input. */
1773 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
1774 vec_info *vinfo = last_stmt_info->vinfo;
1775 vect_unpromoted_value unprom;
1776 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1777 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1778 return NULL;
1780 /* If the bits above RHS_TYPE matter, make sure that they're the
1781 same when extending from UNPROM as they are when extending from RHS. */
1782 if (!INTEGRAL_TYPE_P (lhs_type)
1783 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1784 return NULL;
1786 /* We can get the same result by casting UNPROM directly, to avoid
1787 the unnecessary widening and narrowing. */
1788 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1790 *type_out = get_vectype_for_scalar_type (lhs_type);
1791 if (!*type_out)
1792 return NULL;
1794 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1795 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1796 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1798 stmts->safe_push (last_stmt);
1799 return pattern_stmt;
1802 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1803 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1805 static gimple *
1806 vect_recog_widen_shift_pattern (vec<gimple *> *stmts, tree *type_out)
1808 return vect_recog_widen_op_pattern (stmts, type_out, LSHIFT_EXPR,
1809 WIDEN_LSHIFT_EXPR, true,
1810 "vect_recog_widen_shift_pattern");
1813 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1815 type a_t, b_t, c_t;
1817 S0 a_t = b_t r<< c_t;
1819 Input/Output:
1821 * STMTS: Contains a stmt from which the pattern search begins,
1822 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1823 with a sequence:
1825 S1 d_t = -c_t;
1826 S2 e_t = d_t & (B - 1);
1827 S3 f_t = b_t << c_t;
1828 S4 g_t = b_t >> e_t;
1829 S0 a_t = f_t | g_t;
1831 where B is element bitsize of type.
1833 Output:
1835 * TYPE_OUT: The type of the output of this pattern.
1837 * Return value: A new stmt that will be used to replace the rotate
1838 S0 stmt. */
1840 static gimple *
1841 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_out)
1843 gimple *last_stmt = stmts->pop ();
1844 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1845 gimple *pattern_stmt, *def_stmt;
1846 enum tree_code rhs_code;
1847 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1848 vec_info *vinfo = stmt_vinfo->vinfo;
1849 enum vect_def_type dt;
1850 optab optab1, optab2;
1851 edge ext_def = NULL;
1853 if (!is_gimple_assign (last_stmt))
1854 return NULL;
1856 rhs_code = gimple_assign_rhs_code (last_stmt);
1857 switch (rhs_code)
1859 case LROTATE_EXPR:
1860 case RROTATE_EXPR:
1861 break;
1862 default:
1863 return NULL;
1866 lhs = gimple_assign_lhs (last_stmt);
1867 oprnd0 = gimple_assign_rhs1 (last_stmt);
1868 type = TREE_TYPE (oprnd0);
1869 oprnd1 = gimple_assign_rhs2 (last_stmt);
1870 if (TREE_CODE (oprnd0) != SSA_NAME
1871 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1872 || !INTEGRAL_TYPE_P (type)
1873 || !TYPE_UNSIGNED (type))
1874 return NULL;
1876 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt))
1877 return NULL;
1879 if (dt != vect_internal_def
1880 && dt != vect_constant_def
1881 && dt != vect_external_def)
1882 return NULL;
1884 vectype = get_vectype_for_scalar_type (type);
1885 if (vectype == NULL_TREE)
1886 return NULL;
1888 /* If vector/vector or vector/scalar rotate is supported by the target,
1889 don't do anything here. */
1890 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1891 if (optab1
1892 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1893 return NULL;
1895 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1897 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1898 if (optab2
1899 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1900 return NULL;
1903 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1904 don't do anything here either. */
1905 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1906 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1907 if (!optab1
1908 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1909 || !optab2
1910 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1912 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1913 return NULL;
1914 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1915 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1916 if (!optab1
1917 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1918 || !optab2
1919 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1920 return NULL;
1923 *type_out = vectype;
1925 if (dt == vect_external_def
1926 && TREE_CODE (oprnd1) == SSA_NAME)
1927 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
1929 def = NULL_TREE;
1930 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1931 if (TREE_CODE (oprnd1) == INTEGER_CST
1932 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1933 def = oprnd1;
1934 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1936 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1937 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1938 && TYPE_PRECISION (TREE_TYPE (rhs1))
1939 == TYPE_PRECISION (type))
1940 def = rhs1;
1943 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1944 if (def == NULL_TREE)
1946 def = vect_recog_temp_ssa_var (type, NULL);
1947 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1948 if (ext_def)
1950 basic_block new_bb
1951 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1952 gcc_assert (!new_bb);
1954 else
1955 append_pattern_def_seq (stmt_vinfo, def_stmt);
1957 stype = TREE_TYPE (def);
1958 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1960 if (TREE_CODE (def) == INTEGER_CST)
1962 if (!tree_fits_uhwi_p (def)
1963 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
1964 || integer_zerop (def))
1965 return NULL;
1966 def2 = build_int_cst (stype,
1967 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
1969 else
1971 tree vecstype = get_vectype_for_scalar_type (stype);
1972 stmt_vec_info def_stmt_vinfo;
1974 if (vecstype == NULL_TREE)
1975 return NULL;
1976 def2 = vect_recog_temp_ssa_var (stype, NULL);
1977 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1978 if (ext_def)
1980 basic_block new_bb
1981 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1982 gcc_assert (!new_bb);
1984 else
1986 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1987 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1988 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1989 append_pattern_def_seq (stmt_vinfo, def_stmt);
1992 def2 = vect_recog_temp_ssa_var (stype, NULL);
1993 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
1994 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1995 gimple_assign_lhs (def_stmt), mask);
1996 if (ext_def)
1998 basic_block new_bb
1999 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2000 gcc_assert (!new_bb);
2002 else
2004 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2005 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2006 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2007 append_pattern_def_seq (stmt_vinfo, def_stmt);
2011 var1 = vect_recog_temp_ssa_var (type, NULL);
2012 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2013 ? LSHIFT_EXPR : RSHIFT_EXPR,
2014 oprnd0, def);
2015 append_pattern_def_seq (stmt_vinfo, def_stmt);
2017 var2 = vect_recog_temp_ssa_var (type, NULL);
2018 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2019 ? RSHIFT_EXPR : LSHIFT_EXPR,
2020 oprnd0, def2);
2021 append_pattern_def_seq (stmt_vinfo, def_stmt);
2023 /* Pattern detected. */
2024 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2026 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2027 var = vect_recog_temp_ssa_var (type, NULL);
2028 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2030 stmts->safe_push (last_stmt);
2031 return pattern_stmt;
2034 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2035 vectorized:
2037 type a_t;
2038 TYPE b_T, res_T;
2040 S1 a_t = ;
2041 S2 b_T = ;
2042 S3 res_T = b_T op a_t;
2044 where type 'TYPE' is a type with different size than 'type',
2045 and op is <<, >> or rotate.
2047 Also detect cases:
2049 type a_t;
2050 TYPE b_T, c_T, res_T;
2052 S0 c_T = ;
2053 S1 a_t = (type) c_T;
2054 S2 b_T = ;
2055 S3 res_T = b_T op a_t;
2057 Input/Output:
2059 * STMTS: Contains a stmt from which the pattern search begins,
2060 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2061 with a shift/rotate which has same type on both operands, in the
2062 second case just b_T op c_T, in the first case with added cast
2063 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2065 Output:
2067 * TYPE_OUT: The type of the output of this pattern.
2069 * Return value: A new stmt that will be used to replace the shift/rotate
2070 S3 stmt. */
2072 static gimple *
2073 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts, tree *type_out)
2075 gimple *last_stmt = stmts->pop ();
2076 tree oprnd0, oprnd1, lhs, var;
2077 gimple *pattern_stmt;
2078 enum tree_code rhs_code;
2079 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2080 vec_info *vinfo = stmt_vinfo->vinfo;
2082 if (!is_gimple_assign (last_stmt))
2083 return NULL;
2085 rhs_code = gimple_assign_rhs_code (last_stmt);
2086 switch (rhs_code)
2088 case LSHIFT_EXPR:
2089 case RSHIFT_EXPR:
2090 case LROTATE_EXPR:
2091 case RROTATE_EXPR:
2092 break;
2093 default:
2094 return NULL;
2097 lhs = gimple_assign_lhs (last_stmt);
2098 oprnd0 = gimple_assign_rhs1 (last_stmt);
2099 oprnd1 = gimple_assign_rhs2 (last_stmt);
2100 if (TREE_CODE (oprnd0) != SSA_NAME
2101 || TREE_CODE (oprnd1) != SSA_NAME
2102 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2103 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2104 || TYPE_PRECISION (TREE_TYPE (lhs))
2105 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2106 return NULL;
2108 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2109 if (!def_vinfo)
2110 return NULL;
2112 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2113 if (*type_out == NULL_TREE)
2114 return NULL;
2116 tree def = NULL_TREE;
2117 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2118 if (def_stmt && gimple_assign_cast_p (def_stmt))
2120 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2121 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2122 && TYPE_PRECISION (TREE_TYPE (rhs1))
2123 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2125 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2126 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2127 def = rhs1;
2128 else
2130 tree mask
2131 = build_low_bits_mask (TREE_TYPE (rhs1),
2132 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2133 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2134 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2135 stmt_vec_info new_stmt_info
2136 = new_stmt_vec_info (def_stmt, vinfo);
2137 set_vinfo_for_stmt (def_stmt, new_stmt_info);
2138 STMT_VINFO_VECTYPE (new_stmt_info)
2139 = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2140 new_pattern_def_seq (stmt_vinfo, def_stmt);
2145 if (def == NULL_TREE)
2147 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2148 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2149 new_pattern_def_seq (stmt_vinfo, def_stmt);
2152 /* Pattern detected. */
2153 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2155 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2156 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2157 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2159 stmts->safe_push (last_stmt);
2160 return pattern_stmt;
2163 /* Return true iff the target has a vector optab implementing the operation
2164 CODE on type VECTYPE. */
2166 static bool
2167 target_has_vecop_for_code (tree_code code, tree vectype)
2169 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2170 return voptab
2171 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2174 /* Verify that the target has optabs of VECTYPE to perform all the steps
2175 needed by the multiplication-by-immediate synthesis algorithm described by
2176 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2177 present. Return true iff the target supports all the steps. */
2179 static bool
2180 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2181 tree vectype, bool synth_shift_p)
2183 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2184 return false;
2186 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2187 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2189 if (var == negate_variant
2190 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2191 return false;
2193 /* If we must synthesize shifts with additions make sure that vector
2194 addition is available. */
2195 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2196 return false;
2198 for (int i = 1; i < alg->ops; i++)
2200 switch (alg->op[i])
2202 case alg_shift:
2203 break;
2204 case alg_add_t_m2:
2205 case alg_add_t2_m:
2206 case alg_add_factor:
2207 if (!supports_vplus)
2208 return false;
2209 break;
2210 case alg_sub_t_m2:
2211 case alg_sub_t2_m:
2212 case alg_sub_factor:
2213 if (!supports_vminus)
2214 return false;
2215 break;
2216 case alg_unknown:
2217 case alg_m:
2218 case alg_zero:
2219 case alg_impossible:
2220 return false;
2221 default:
2222 gcc_unreachable ();
2226 return true;
2229 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2230 putting the final result in DEST. Append all statements but the last into
2231 VINFO. Return the last statement. */
2233 static gimple *
2234 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2235 stmt_vec_info vinfo)
2237 HOST_WIDE_INT i;
2238 tree itype = TREE_TYPE (op);
2239 tree prev_res = op;
2240 gcc_assert (amnt >= 0);
2241 for (i = 0; i < amnt; i++)
2243 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2244 : dest;
2245 gimple *stmt
2246 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2247 prev_res = tmp_var;
2248 if (i < amnt - 1)
2249 append_pattern_def_seq (vinfo, stmt);
2250 else
2251 return stmt;
2253 gcc_unreachable ();
2254 return NULL;
2257 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2258 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2259 the process if necessary. Append the resulting assignment statements
2260 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2261 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2262 left shifts using additions. */
2264 static tree
2265 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2266 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2268 if (integer_zerop (op2)
2269 && (code == LSHIFT_EXPR
2270 || code == PLUS_EXPR))
2272 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2273 return op1;
2276 gimple *stmt;
2277 tree itype = TREE_TYPE (op1);
2278 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2280 if (code == LSHIFT_EXPR
2281 && synth_shift_p)
2283 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2284 stmt_vinfo);
2285 append_pattern_def_seq (stmt_vinfo, stmt);
2286 return tmp_var;
2289 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2290 append_pattern_def_seq (stmt_vinfo, stmt);
2291 return tmp_var;
2294 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2295 and simple arithmetic operations to be vectorized. Record the statements
2296 produced in STMT_VINFO and return the last statement in the sequence or
2297 NULL if it's not possible to synthesize such a multiplication.
2298 This function mirrors the behavior of expand_mult_const in expmed.c but
2299 works on tree-ssa form. */
2301 static gimple *
2302 vect_synth_mult_by_constant (tree op, tree val,
2303 stmt_vec_info stmt_vinfo)
2305 tree itype = TREE_TYPE (op);
2306 machine_mode mode = TYPE_MODE (itype);
2307 struct algorithm alg;
2308 mult_variant variant;
2309 if (!tree_fits_shwi_p (val))
2310 return NULL;
2312 /* Multiplication synthesis by shifts, adds and subs can introduce
2313 signed overflow where the original operation didn't. Perform the
2314 operations on an unsigned type and cast back to avoid this.
2315 In the future we may want to relax this for synthesis algorithms
2316 that we can prove do not cause unexpected overflow. */
2317 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2319 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2321 /* Targets that don't support vector shifts but support vector additions
2322 can synthesize shifts that way. */
2323 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2325 HOST_WIDE_INT hwval = tree_to_shwi (val);
2326 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2327 The vectorizer's benefit analysis will decide whether it's beneficial
2328 to do this. */
2329 bool possible = choose_mult_variant (mode, hwval, &alg,
2330 &variant, MAX_COST);
2331 if (!possible)
2332 return NULL;
2334 tree vectype = get_vectype_for_scalar_type (multtype);
2336 if (!vectype
2337 || !target_supports_mult_synth_alg (&alg, variant,
2338 vectype, synth_shift_p))
2339 return NULL;
2341 tree accumulator;
2343 /* Clear out the sequence of statements so we can populate it below. */
2344 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2345 gimple *stmt = NULL;
2347 if (cast_to_unsigned_p)
2349 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2350 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2351 append_pattern_def_seq (stmt_vinfo, stmt);
2352 op = tmp_op;
2355 if (alg.op[0] == alg_zero)
2356 accumulator = build_int_cst (multtype, 0);
2357 else
2358 accumulator = op;
2360 bool needs_fixup = (variant == negate_variant)
2361 || (variant == add_variant);
2363 for (int i = 1; i < alg.ops; i++)
2365 tree shft_log = build_int_cst (multtype, alg.log[i]);
2366 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2367 tree tmp_var = NULL_TREE;
2369 switch (alg.op[i])
2371 case alg_shift:
2372 if (synth_shift_p)
2373 stmt
2374 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2375 stmt_vinfo);
2376 else
2377 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2378 shft_log);
2379 break;
2380 case alg_add_t_m2:
2381 tmp_var
2382 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2383 stmt_vinfo, synth_shift_p);
2384 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2385 tmp_var);
2386 break;
2387 case alg_sub_t_m2:
2388 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2389 shft_log, stmt_vinfo,
2390 synth_shift_p);
2391 /* In some algorithms the first step involves zeroing the
2392 accumulator. If subtracting from such an accumulator
2393 just emit the negation directly. */
2394 if (integer_zerop (accumulator))
2395 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2396 else
2397 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2398 tmp_var);
2399 break;
2400 case alg_add_t2_m:
2401 tmp_var
2402 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2403 stmt_vinfo, synth_shift_p);
2404 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2405 break;
2406 case alg_sub_t2_m:
2407 tmp_var
2408 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2409 stmt_vinfo, synth_shift_p);
2410 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2411 break;
2412 case alg_add_factor:
2413 tmp_var
2414 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2415 stmt_vinfo, synth_shift_p);
2416 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2417 tmp_var);
2418 break;
2419 case alg_sub_factor:
2420 tmp_var
2421 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2422 stmt_vinfo, synth_shift_p);
2423 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2424 accumulator);
2425 break;
2426 default:
2427 gcc_unreachable ();
2429 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2430 but rather return it directly. */
2432 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2433 append_pattern_def_seq (stmt_vinfo, stmt);
2434 accumulator = accum_tmp;
2436 if (variant == negate_variant)
2438 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2439 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2440 accumulator = accum_tmp;
2441 if (cast_to_unsigned_p)
2442 append_pattern_def_seq (stmt_vinfo, stmt);
2444 else if (variant == add_variant)
2446 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2447 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2448 accumulator = accum_tmp;
2449 if (cast_to_unsigned_p)
2450 append_pattern_def_seq (stmt_vinfo, stmt);
2452 /* Move back to a signed if needed. */
2453 if (cast_to_unsigned_p)
2455 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2456 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2459 return stmt;
2462 /* Detect multiplication by constant and convert it into a sequence of
2463 shifts and additions, subtractions, negations. We reuse the
2464 choose_mult_variant algorithms from expmed.c
2466 Input/Output:
2468 STMTS: Contains a stmt from which the pattern search begins,
2469 i.e. the mult stmt.
2471 Output:
2473 * TYPE_OUT: The type of the output of this pattern.
2475 * Return value: A new stmt that will be used to replace
2476 the multiplication. */
2478 static gimple *
2479 vect_recog_mult_pattern (vec<gimple *> *stmts, tree *type_out)
2481 gimple *last_stmt = stmts->pop ();
2482 tree oprnd0, oprnd1, vectype, itype;
2483 gimple *pattern_stmt;
2484 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2486 if (!is_gimple_assign (last_stmt))
2487 return NULL;
2489 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2490 return NULL;
2492 oprnd0 = gimple_assign_rhs1 (last_stmt);
2493 oprnd1 = gimple_assign_rhs2 (last_stmt);
2494 itype = TREE_TYPE (oprnd0);
2496 if (TREE_CODE (oprnd0) != SSA_NAME
2497 || TREE_CODE (oprnd1) != INTEGER_CST
2498 || !INTEGRAL_TYPE_P (itype)
2499 || !type_has_mode_precision_p (itype))
2500 return NULL;
2502 vectype = get_vectype_for_scalar_type (itype);
2503 if (vectype == NULL_TREE)
2504 return NULL;
2506 /* If the target can handle vectorized multiplication natively,
2507 don't attempt to optimize this. */
2508 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2509 if (mul_optab != unknown_optab)
2511 machine_mode vec_mode = TYPE_MODE (vectype);
2512 int icode = (int) optab_handler (mul_optab, vec_mode);
2513 if (icode != CODE_FOR_nothing)
2514 return NULL;
2517 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2518 if (!pattern_stmt)
2519 return NULL;
2521 /* Pattern detected. */
2522 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2524 stmts->safe_push (last_stmt);
2525 *type_out = vectype;
2527 return pattern_stmt;
2530 /* Detect a signed division by a constant that wouldn't be
2531 otherwise vectorized:
2533 type a_t, b_t;
2535 S1 a_t = b_t / N;
2537 where type 'type' is an integral type and N is a constant.
2539 Similarly handle modulo by a constant:
2541 S4 a_t = b_t % N;
2543 Input/Output:
2545 * STMTS: Contains a stmt from which the pattern search begins,
2546 i.e. the division stmt. S1 is replaced by if N is a power
2547 of two constant and type is signed:
2548 S3 y_t = b_t < 0 ? N - 1 : 0;
2549 S2 x_t = b_t + y_t;
2550 S1' a_t = x_t >> log2 (N);
2552 S4 is replaced if N is a power of two constant and
2553 type is signed by (where *_T temporaries have unsigned type):
2554 S9 y_T = b_t < 0 ? -1U : 0U;
2555 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2556 S7 z_t = (type) z_T;
2557 S6 w_t = b_t + z_t;
2558 S5 x_t = w_t & (N - 1);
2559 S4' a_t = x_t - z_t;
2561 Output:
2563 * TYPE_OUT: The type of the output of this pattern.
2565 * Return value: A new stmt that will be used to replace the division
2566 S1 or modulo S4 stmt. */
2568 static gimple *
2569 vect_recog_divmod_pattern (vec<gimple *> *stmts, tree *type_out)
2571 gimple *last_stmt = stmts->pop ();
2572 tree oprnd0, oprnd1, vectype, itype, cond;
2573 gimple *pattern_stmt, *def_stmt;
2574 enum tree_code rhs_code;
2575 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2576 vec_info *vinfo = stmt_vinfo->vinfo;
2577 optab optab;
2578 tree q;
2579 int dummy_int, prec;
2580 stmt_vec_info def_stmt_vinfo;
2582 if (!is_gimple_assign (last_stmt))
2583 return NULL;
2585 rhs_code = gimple_assign_rhs_code (last_stmt);
2586 switch (rhs_code)
2588 case TRUNC_DIV_EXPR:
2589 case TRUNC_MOD_EXPR:
2590 break;
2591 default:
2592 return NULL;
2595 oprnd0 = gimple_assign_rhs1 (last_stmt);
2596 oprnd1 = gimple_assign_rhs2 (last_stmt);
2597 itype = TREE_TYPE (oprnd0);
2598 if (TREE_CODE (oprnd0) != SSA_NAME
2599 || TREE_CODE (oprnd1) != INTEGER_CST
2600 || TREE_CODE (itype) != INTEGER_TYPE
2601 || !type_has_mode_precision_p (itype))
2602 return NULL;
2604 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2605 vectype = get_vectype_for_scalar_type (itype);
2606 if (vectype == NULL_TREE)
2607 return NULL;
2609 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2611 /* If the target can handle vectorized division or modulo natively,
2612 don't attempt to optimize this, since native division is likely
2613 to give smaller code. */
2614 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2615 if (optab != unknown_optab)
2617 machine_mode vec_mode = TYPE_MODE (vectype);
2618 int icode = (int) optab_handler (optab, vec_mode);
2619 if (icode != CODE_FOR_nothing)
2620 return NULL;
2624 prec = TYPE_PRECISION (itype);
2625 if (integer_pow2p (oprnd1))
2627 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2628 return NULL;
2630 /* Pattern detected. */
2631 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2633 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2634 build_int_cst (itype, 0));
2635 if (rhs_code == TRUNC_DIV_EXPR)
2637 tree var = vect_recog_temp_ssa_var (itype, NULL);
2638 tree shift;
2639 def_stmt
2640 = gimple_build_assign (var, COND_EXPR, cond,
2641 fold_build2 (MINUS_EXPR, itype, oprnd1,
2642 build_int_cst (itype, 1)),
2643 build_int_cst (itype, 0));
2644 new_pattern_def_seq (stmt_vinfo, def_stmt);
2645 var = vect_recog_temp_ssa_var (itype, NULL);
2646 def_stmt
2647 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2648 gimple_assign_lhs (def_stmt));
2649 append_pattern_def_seq (stmt_vinfo, def_stmt);
2651 shift = build_int_cst (itype, tree_log2 (oprnd1));
2652 pattern_stmt
2653 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2654 RSHIFT_EXPR, var, shift);
2656 else
2658 tree signmask;
2659 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2660 if (compare_tree_int (oprnd1, 2) == 0)
2662 signmask = vect_recog_temp_ssa_var (itype, NULL);
2663 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2664 build_int_cst (itype, 1),
2665 build_int_cst (itype, 0));
2666 append_pattern_def_seq (stmt_vinfo, def_stmt);
2668 else
2670 tree utype
2671 = build_nonstandard_integer_type (prec, 1);
2672 tree vecutype = get_vectype_for_scalar_type (utype);
2673 tree shift
2674 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2675 - tree_log2 (oprnd1));
2676 tree var = vect_recog_temp_ssa_var (utype, NULL);
2678 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2679 build_int_cst (utype, -1),
2680 build_int_cst (utype, 0));
2681 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2682 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2683 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2684 append_pattern_def_seq (stmt_vinfo, def_stmt);
2685 var = vect_recog_temp_ssa_var (utype, NULL);
2686 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2687 gimple_assign_lhs (def_stmt),
2688 shift);
2689 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2690 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2691 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2692 append_pattern_def_seq (stmt_vinfo, def_stmt);
2693 signmask = vect_recog_temp_ssa_var (itype, NULL);
2694 def_stmt
2695 = gimple_build_assign (signmask, NOP_EXPR, var);
2696 append_pattern_def_seq (stmt_vinfo, def_stmt);
2698 def_stmt
2699 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2700 PLUS_EXPR, oprnd0, signmask);
2701 append_pattern_def_seq (stmt_vinfo, def_stmt);
2702 def_stmt
2703 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2704 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2705 fold_build2 (MINUS_EXPR, itype, oprnd1,
2706 build_int_cst (itype, 1)));
2707 append_pattern_def_seq (stmt_vinfo, def_stmt);
2709 pattern_stmt
2710 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2711 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2712 signmask);
2715 stmts->safe_push (last_stmt);
2717 *type_out = vectype;
2718 return pattern_stmt;
2721 if (prec > HOST_BITS_PER_WIDE_INT
2722 || integer_zerop (oprnd1))
2723 return NULL;
2725 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2726 return NULL;
2728 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2730 if (TYPE_UNSIGNED (itype))
2732 unsigned HOST_WIDE_INT mh, ml;
2733 int pre_shift, post_shift;
2734 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2735 & GET_MODE_MASK (itype_mode));
2736 tree t1, t2, t3, t4;
2738 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2739 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2740 return NULL;
2742 /* Find a suitable multiplier and right shift count
2743 instead of multiplying with D. */
2744 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2746 /* If the suggested multiplier is more than SIZE bits, we can do better
2747 for even divisors, using an initial right shift. */
2748 if (mh != 0 && (d & 1) == 0)
2750 pre_shift = ctz_or_zero (d);
2751 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2752 &ml, &post_shift, &dummy_int);
2753 gcc_assert (!mh);
2755 else
2756 pre_shift = 0;
2758 if (mh != 0)
2760 if (post_shift - 1 >= prec)
2761 return NULL;
2763 /* t1 = oprnd0 h* ml;
2764 t2 = oprnd0 - t1;
2765 t3 = t2 >> 1;
2766 t4 = t1 + t3;
2767 q = t4 >> (post_shift - 1); */
2768 t1 = vect_recog_temp_ssa_var (itype, NULL);
2769 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2770 build_int_cst (itype, ml));
2771 append_pattern_def_seq (stmt_vinfo, def_stmt);
2773 t2 = vect_recog_temp_ssa_var (itype, NULL);
2774 def_stmt
2775 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2776 append_pattern_def_seq (stmt_vinfo, def_stmt);
2778 t3 = vect_recog_temp_ssa_var (itype, NULL);
2779 def_stmt
2780 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2781 append_pattern_def_seq (stmt_vinfo, def_stmt);
2783 t4 = vect_recog_temp_ssa_var (itype, NULL);
2784 def_stmt
2785 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2787 if (post_shift != 1)
2789 append_pattern_def_seq (stmt_vinfo, def_stmt);
2791 q = vect_recog_temp_ssa_var (itype, NULL);
2792 pattern_stmt
2793 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2794 build_int_cst (itype, post_shift - 1));
2796 else
2798 q = t4;
2799 pattern_stmt = def_stmt;
2802 else
2804 if (pre_shift >= prec || post_shift >= prec)
2805 return NULL;
2807 /* t1 = oprnd0 >> pre_shift;
2808 t2 = t1 h* ml;
2809 q = t2 >> post_shift; */
2810 if (pre_shift)
2812 t1 = vect_recog_temp_ssa_var (itype, NULL);
2813 def_stmt
2814 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2815 build_int_cst (NULL, pre_shift));
2816 append_pattern_def_seq (stmt_vinfo, def_stmt);
2818 else
2819 t1 = oprnd0;
2821 t2 = vect_recog_temp_ssa_var (itype, NULL);
2822 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2823 build_int_cst (itype, ml));
2825 if (post_shift)
2827 append_pattern_def_seq (stmt_vinfo, def_stmt);
2829 q = vect_recog_temp_ssa_var (itype, NULL);
2830 def_stmt
2831 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2832 build_int_cst (itype, post_shift));
2834 else
2835 q = t2;
2837 pattern_stmt = def_stmt;
2840 else
2842 unsigned HOST_WIDE_INT ml;
2843 int post_shift;
2844 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2845 unsigned HOST_WIDE_INT abs_d;
2846 bool add = false;
2847 tree t1, t2, t3, t4;
2849 /* Give up for -1. */
2850 if (d == -1)
2851 return NULL;
2853 /* Since d might be INT_MIN, we have to cast to
2854 unsigned HOST_WIDE_INT before negating to avoid
2855 undefined signed overflow. */
2856 abs_d = (d >= 0
2857 ? (unsigned HOST_WIDE_INT) d
2858 : - (unsigned HOST_WIDE_INT) d);
2860 /* n rem d = n rem -d */
2861 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2863 d = abs_d;
2864 oprnd1 = build_int_cst (itype, abs_d);
2866 else if (HOST_BITS_PER_WIDE_INT >= prec
2867 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2868 /* This case is not handled correctly below. */
2869 return NULL;
2871 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2872 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2874 add = true;
2875 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2877 if (post_shift >= prec)
2878 return NULL;
2880 /* t1 = oprnd0 h* ml; */
2881 t1 = vect_recog_temp_ssa_var (itype, NULL);
2882 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2883 build_int_cst (itype, ml));
2885 if (add)
2887 /* t2 = t1 + oprnd0; */
2888 append_pattern_def_seq (stmt_vinfo, def_stmt);
2889 t2 = vect_recog_temp_ssa_var (itype, NULL);
2890 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2892 else
2893 t2 = t1;
2895 if (post_shift)
2897 /* t3 = t2 >> post_shift; */
2898 append_pattern_def_seq (stmt_vinfo, def_stmt);
2899 t3 = vect_recog_temp_ssa_var (itype, NULL);
2900 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2901 build_int_cst (itype, post_shift));
2903 else
2904 t3 = t2;
2906 wide_int oprnd0_min, oprnd0_max;
2907 int msb = 1;
2908 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2910 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2911 msb = 0;
2912 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2913 msb = -1;
2916 if (msb == 0 && d >= 0)
2918 /* q = t3; */
2919 q = t3;
2920 pattern_stmt = def_stmt;
2922 else
2924 /* t4 = oprnd0 >> (prec - 1);
2925 or if we know from VRP that oprnd0 >= 0
2926 t4 = 0;
2927 or if we know from VRP that oprnd0 < 0
2928 t4 = -1; */
2929 append_pattern_def_seq (stmt_vinfo, def_stmt);
2930 t4 = vect_recog_temp_ssa_var (itype, NULL);
2931 if (msb != 1)
2932 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2933 build_int_cst (itype, msb));
2934 else
2935 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2936 build_int_cst (itype, prec - 1));
2937 append_pattern_def_seq (stmt_vinfo, def_stmt);
2939 /* q = t3 - t4; or q = t4 - t3; */
2940 q = vect_recog_temp_ssa_var (itype, NULL);
2941 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2942 d < 0 ? t3 : t4);
2946 if (rhs_code == TRUNC_MOD_EXPR)
2948 tree r, t1;
2950 /* We divided. Now finish by:
2951 t1 = q * oprnd1;
2952 r = oprnd0 - t1; */
2953 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2955 t1 = vect_recog_temp_ssa_var (itype, NULL);
2956 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2957 append_pattern_def_seq (stmt_vinfo, def_stmt);
2959 r = vect_recog_temp_ssa_var (itype, NULL);
2960 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2963 /* Pattern detected. */
2964 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2966 stmts->safe_push (last_stmt);
2968 *type_out = vectype;
2969 return pattern_stmt;
2972 /* Function vect_recog_mixed_size_cond_pattern
2974 Try to find the following pattern:
2976 type x_t, y_t;
2977 TYPE a_T, b_T, c_T;
2978 loop:
2979 S1 a_T = x_t CMP y_t ? b_T : c_T;
2981 where type 'TYPE' is an integral type which has different size
2982 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2983 than 'type', the constants need to fit into an integer type
2984 with the same width as 'type') or results of conversion from 'type'.
2986 Input:
2988 * LAST_STMT: A stmt from which the pattern search begins.
2990 Output:
2992 * TYPE_OUT: The type of the output of this pattern.
2994 * Return value: A new stmt that will be used to replace the pattern.
2995 Additionally a def_stmt is added.
2997 a_it = x_t CMP y_t ? b_it : c_it;
2998 a_T = (TYPE) a_it; */
3000 static gimple *
3001 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_out)
3003 gimple *last_stmt = (*stmts)[0];
3004 tree cond_expr, then_clause, else_clause;
3005 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3006 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3007 gimple *pattern_stmt, *def_stmt;
3008 vec_info *vinfo = stmt_vinfo->vinfo;
3009 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3010 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3011 bool promotion;
3012 tree comp_scalar_type;
3014 if (!is_gimple_assign (last_stmt)
3015 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3016 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3017 return NULL;
3019 cond_expr = gimple_assign_rhs1 (last_stmt);
3020 then_clause = gimple_assign_rhs2 (last_stmt);
3021 else_clause = gimple_assign_rhs3 (last_stmt);
3023 if (!COMPARISON_CLASS_P (cond_expr))
3024 return NULL;
3026 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3027 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3028 if (comp_vectype == NULL_TREE)
3029 return NULL;
3031 type = gimple_expr_type (last_stmt);
3032 if (types_compatible_p (type, comp_scalar_type)
3033 || ((TREE_CODE (then_clause) != INTEGER_CST
3034 || TREE_CODE (else_clause) != INTEGER_CST)
3035 && !INTEGRAL_TYPE_P (comp_scalar_type))
3036 || !INTEGRAL_TYPE_P (type))
3037 return NULL;
3039 if ((TREE_CODE (then_clause) != INTEGER_CST
3040 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3041 &def_stmt0, &promotion))
3042 || (TREE_CODE (else_clause) != INTEGER_CST
3043 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3044 &def_stmt1, &promotion)))
3045 return NULL;
3047 if (orig_type0 && orig_type1
3048 && !types_compatible_p (orig_type0, orig_type1))
3049 return NULL;
3051 if (orig_type0)
3053 if (!types_compatible_p (orig_type0, comp_scalar_type))
3054 return NULL;
3055 then_clause = gimple_assign_rhs1 (def_stmt0);
3056 itype = orig_type0;
3059 if (orig_type1)
3061 if (!types_compatible_p (orig_type1, comp_scalar_type))
3062 return NULL;
3063 else_clause = gimple_assign_rhs1 (def_stmt1);
3064 itype = orig_type1;
3068 HOST_WIDE_INT cmp_mode_size
3069 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3071 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3072 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3073 return NULL;
3075 vectype = get_vectype_for_scalar_type (type);
3076 if (vectype == NULL_TREE)
3077 return NULL;
3079 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3080 return NULL;
3082 if (itype == NULL_TREE)
3083 itype = build_nonstandard_integer_type (cmp_mode_size,
3084 TYPE_UNSIGNED (type));
3086 if (itype == NULL_TREE
3087 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3088 return NULL;
3090 vecitype = get_vectype_for_scalar_type (itype);
3091 if (vecitype == NULL_TREE)
3092 return NULL;
3094 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3095 return NULL;
3097 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3099 if ((TREE_CODE (then_clause) == INTEGER_CST
3100 && !int_fits_type_p (then_clause, itype))
3101 || (TREE_CODE (else_clause) == INTEGER_CST
3102 && !int_fits_type_p (else_clause, itype)))
3103 return NULL;
3106 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3107 COND_EXPR, unshare_expr (cond_expr),
3108 fold_convert (itype, then_clause),
3109 fold_convert (itype, else_clause));
3110 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3111 NOP_EXPR, gimple_assign_lhs (def_stmt));
3113 new_pattern_def_seq (stmt_vinfo, def_stmt);
3114 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3115 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3116 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3117 *type_out = vectype;
3119 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3121 return pattern_stmt;
3125 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3126 true if bool VAR can and should be optimized that way. Assume it shouldn't
3127 in case it's a result of a comparison which can be directly vectorized into
3128 a vector comparison. Fills in STMTS with all stmts visited during the
3129 walk. */
3131 static bool
3132 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3134 tree rhs1;
3135 enum tree_code rhs_code;
3137 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3138 if (!def_stmt_info)
3139 return false;
3141 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3142 if (!def_stmt)
3143 return false;
3145 if (stmts.contains (def_stmt))
3146 return true;
3148 rhs1 = gimple_assign_rhs1 (def_stmt);
3149 rhs_code = gimple_assign_rhs_code (def_stmt);
3150 switch (rhs_code)
3152 case SSA_NAME:
3153 if (! check_bool_pattern (rhs1, vinfo, stmts))
3154 return false;
3155 break;
3157 CASE_CONVERT:
3158 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3159 return false;
3160 if (! check_bool_pattern (rhs1, vinfo, stmts))
3161 return false;
3162 break;
3164 case BIT_NOT_EXPR:
3165 if (! check_bool_pattern (rhs1, vinfo, stmts))
3166 return false;
3167 break;
3169 case BIT_AND_EXPR:
3170 case BIT_IOR_EXPR:
3171 case BIT_XOR_EXPR:
3172 if (! check_bool_pattern (rhs1, vinfo, stmts)
3173 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3174 return false;
3175 break;
3177 default:
3178 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3180 tree vecitype, comp_vectype;
3182 /* If the comparison can throw, then is_gimple_condexpr will be
3183 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3184 if (stmt_could_throw_p (def_stmt))
3185 return false;
3187 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3188 if (comp_vectype == NULL_TREE)
3189 return false;
3191 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3192 if (mask_type
3193 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3194 return false;
3196 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3198 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3199 tree itype
3200 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3201 vecitype = get_vectype_for_scalar_type (itype);
3202 if (vecitype == NULL_TREE)
3203 return false;
3205 else
3206 vecitype = comp_vectype;
3207 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3208 return false;
3210 else
3211 return false;
3212 break;
3215 bool res = stmts.add (def_stmt);
3216 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3217 gcc_assert (!res);
3219 return true;
3223 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3224 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3225 pattern sequence. */
3227 static tree
3228 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3230 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3231 NOP_EXPR, var);
3232 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3233 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3234 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3235 append_pattern_def_seq (stmt_info, cast_stmt);
3236 return gimple_assign_lhs (cast_stmt);
3239 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3240 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3241 type, OUT_TYPE is the desired final integer type of the whole pattern.
3242 STMT_INFO is the info of the pattern root and is where pattern stmts should
3243 be associated with. DEFS is a map of pattern defs. */
3245 static void
3246 adjust_bool_pattern (tree var, tree out_type,
3247 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3249 gimple *stmt = SSA_NAME_DEF_STMT (var);
3250 enum tree_code rhs_code, def_rhs_code;
3251 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3252 location_t loc;
3253 gimple *pattern_stmt, *def_stmt;
3254 tree trueval = NULL_TREE;
3256 rhs1 = gimple_assign_rhs1 (stmt);
3257 rhs2 = gimple_assign_rhs2 (stmt);
3258 rhs_code = gimple_assign_rhs_code (stmt);
3259 loc = gimple_location (stmt);
3260 switch (rhs_code)
3262 case SSA_NAME:
3263 CASE_CONVERT:
3264 irhs1 = *defs.get (rhs1);
3265 itype = TREE_TYPE (irhs1);
3266 pattern_stmt
3267 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3268 SSA_NAME, irhs1);
3269 break;
3271 case BIT_NOT_EXPR:
3272 irhs1 = *defs.get (rhs1);
3273 itype = TREE_TYPE (irhs1);
3274 pattern_stmt
3275 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3276 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3277 break;
3279 case BIT_AND_EXPR:
3280 /* Try to optimize x = y & (a < b ? 1 : 0); into
3281 x = (a < b ? y : 0);
3283 E.g. for:
3284 bool a_b, b_b, c_b;
3285 TYPE d_T;
3287 S1 a_b = x1 CMP1 y1;
3288 S2 b_b = x2 CMP2 y2;
3289 S3 c_b = a_b & b_b;
3290 S4 d_T = (TYPE) c_b;
3292 we would normally emit:
3294 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3295 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3296 S3' c_T = a_T & b_T;
3297 S4' d_T = c_T;
3299 but we can save one stmt by using the
3300 result of one of the COND_EXPRs in the other COND_EXPR and leave
3301 BIT_AND_EXPR stmt out:
3303 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3304 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3305 S4' f_T = c_T;
3307 At least when VEC_COND_EXPR is implemented using masks
3308 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3309 computes the comparison masks and ands it, in one case with
3310 all ones vector, in the other case with a vector register.
3311 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3312 often more expensive. */
3313 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3314 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3315 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3317 irhs1 = *defs.get (rhs1);
3318 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3319 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3320 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3322 rhs_code = def_rhs_code;
3323 rhs1 = def_rhs1;
3324 rhs2 = gimple_assign_rhs2 (def_stmt);
3325 trueval = irhs1;
3326 goto do_compare;
3328 else
3329 irhs2 = *defs.get (rhs2);
3330 goto and_ior_xor;
3332 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3333 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3334 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3336 irhs2 = *defs.get (rhs2);
3337 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3338 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3339 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3341 rhs_code = def_rhs_code;
3342 rhs1 = def_rhs1;
3343 rhs2 = gimple_assign_rhs2 (def_stmt);
3344 trueval = irhs2;
3345 goto do_compare;
3347 else
3348 irhs1 = *defs.get (rhs1);
3349 goto and_ior_xor;
3351 /* FALLTHRU */
3352 case BIT_IOR_EXPR:
3353 case BIT_XOR_EXPR:
3354 irhs1 = *defs.get (rhs1);
3355 irhs2 = *defs.get (rhs2);
3356 and_ior_xor:
3357 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3358 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3360 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3361 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3362 int out_prec = TYPE_PRECISION (out_type);
3363 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3364 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3365 stmt_info);
3366 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3367 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3368 stmt_info);
3369 else
3371 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3372 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3375 itype = TREE_TYPE (irhs1);
3376 pattern_stmt
3377 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3378 rhs_code, irhs1, irhs2);
3379 break;
3381 default:
3382 do_compare:
3383 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3384 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3385 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3386 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3387 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3389 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3390 itype
3391 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3393 else
3394 itype = TREE_TYPE (rhs1);
3395 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3396 if (trueval == NULL_TREE)
3397 trueval = build_int_cst (itype, 1);
3398 else
3399 gcc_checking_assert (useless_type_conversion_p (itype,
3400 TREE_TYPE (trueval)));
3401 pattern_stmt
3402 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3403 COND_EXPR, cond_expr, trueval,
3404 build_int_cst (itype, 0));
3405 break;
3408 gimple_set_location (pattern_stmt, loc);
3409 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3410 pattern def seq stmts instead of just letting auto-detection do
3411 its work? */
3412 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3413 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3414 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3415 append_pattern_def_seq (stmt_info, pattern_stmt);
3416 defs.put (var, gimple_assign_lhs (pattern_stmt));
3419 /* Comparison function to qsort a vector of gimple stmts after UID. */
3421 static int
3422 sort_after_uid (const void *p1, const void *p2)
3424 const gimple *stmt1 = *(const gimple * const *)p1;
3425 const gimple *stmt2 = *(const gimple * const *)p2;
3426 return gimple_uid (stmt1) - gimple_uid (stmt2);
3429 /* Create pattern stmts for all stmts participating in the bool pattern
3430 specified by BOOL_STMT_SET and its root STMT with the desired type
3431 OUT_TYPE. Return the def of the pattern root. */
3433 static tree
3434 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3435 tree out_type, gimple *stmt)
3437 /* Gather original stmts in the bool pattern in their order of appearance
3438 in the IL. */
3439 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3440 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3441 i != bool_stmt_set.end (); ++i)
3442 bool_stmts.quick_push (*i);
3443 bool_stmts.qsort (sort_after_uid);
3445 /* Now process them in that order, producing pattern stmts. */
3446 hash_map <tree, tree> defs;
3447 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3448 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3449 out_type, vinfo_for_stmt (stmt), defs);
3451 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3452 gimple *pattern_stmt
3453 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3454 return gimple_assign_lhs (pattern_stmt);
3457 /* Helper for search_type_for_mask. */
3459 static tree
3460 search_type_for_mask_1 (tree var, vec_info *vinfo,
3461 hash_map<gimple *, tree> &cache)
3463 tree rhs1;
3464 enum tree_code rhs_code;
3465 tree res = NULL_TREE, res2;
3467 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3468 return NULL_TREE;
3470 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3471 if (!def_stmt_info)
3472 return NULL_TREE;
3474 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3475 if (!def_stmt)
3476 return NULL_TREE;
3478 tree *c = cache.get (def_stmt);
3479 if (c)
3480 return *c;
3482 rhs_code = gimple_assign_rhs_code (def_stmt);
3483 rhs1 = gimple_assign_rhs1 (def_stmt);
3485 switch (rhs_code)
3487 case SSA_NAME:
3488 case BIT_NOT_EXPR:
3489 CASE_CONVERT:
3490 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3491 break;
3493 case BIT_AND_EXPR:
3494 case BIT_IOR_EXPR:
3495 case BIT_XOR_EXPR:
3496 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3497 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3498 cache);
3499 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3500 res = res2;
3501 break;
3503 default:
3504 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3506 tree comp_vectype, mask_type;
3508 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3510 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3511 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3512 vinfo, cache);
3513 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3514 res = res2;
3515 break;
3518 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3519 if (comp_vectype == NULL_TREE)
3521 res = NULL_TREE;
3522 break;
3525 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3526 if (!mask_type
3527 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3529 res = NULL_TREE;
3530 break;
3533 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3534 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3536 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3537 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3539 else
3540 res = TREE_TYPE (rhs1);
3544 cache.put (def_stmt, res);
3545 return res;
3548 /* Return the proper type for converting bool VAR into
3549 an integer value or NULL_TREE if no such type exists.
3550 The type is chosen so that converted value has the
3551 same number of elements as VAR's vector type. */
3553 static tree
3554 search_type_for_mask (tree var, vec_info *vinfo)
3556 hash_map<gimple *, tree> cache;
3557 return search_type_for_mask_1 (var, vinfo, cache);
3560 /* Function vect_recog_bool_pattern
3562 Try to find pattern like following:
3564 bool a_b, b_b, c_b, d_b, e_b;
3565 TYPE f_T;
3566 loop:
3567 S1 a_b = x1 CMP1 y1;
3568 S2 b_b = x2 CMP2 y2;
3569 S3 c_b = a_b & b_b;
3570 S4 d_b = x3 CMP3 y3;
3571 S5 e_b = c_b | d_b;
3572 S6 f_T = (TYPE) e_b;
3574 where type 'TYPE' is an integral type. Or a similar pattern
3575 ending in
3577 S6 f_Y = e_b ? r_Y : s_Y;
3579 as results from if-conversion of a complex condition.
3581 Input:
3583 * LAST_STMT: A stmt at the end from which the pattern
3584 search begins, i.e. cast of a bool to
3585 an integer type.
3587 Output:
3589 * TYPE_OUT: The type of the output of this pattern.
3591 * Return value: A new stmt that will be used to replace the pattern.
3593 Assuming size of TYPE is the same as size of all comparisons
3594 (otherwise some casts would be added where needed), the above
3595 sequence we create related pattern stmts:
3596 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3597 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3598 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3599 S5' e_T = c_T | d_T;
3600 S6' f_T = e_T;
3602 Instead of the above S3' we could emit:
3603 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3604 S3' c_T = a_T | b_T;
3605 but the above is more efficient. */
3607 static gimple *
3608 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_out)
3610 gimple *last_stmt = stmts->pop ();
3611 enum tree_code rhs_code;
3612 tree var, lhs, rhs, vectype;
3613 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3614 stmt_vec_info new_stmt_info;
3615 vec_info *vinfo = stmt_vinfo->vinfo;
3616 gimple *pattern_stmt;
3618 if (!is_gimple_assign (last_stmt))
3619 return NULL;
3621 var = gimple_assign_rhs1 (last_stmt);
3622 lhs = gimple_assign_lhs (last_stmt);
3624 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3625 return NULL;
3627 hash_set<gimple *> bool_stmts;
3629 rhs_code = gimple_assign_rhs_code (last_stmt);
3630 if (CONVERT_EXPR_CODE_P (rhs_code))
3632 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3633 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3634 return NULL;
3635 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3636 if (vectype == NULL_TREE)
3637 return NULL;
3639 if (check_bool_pattern (var, vinfo, bool_stmts))
3641 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3642 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3643 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3644 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3645 else
3646 pattern_stmt
3647 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3649 else
3651 tree type = search_type_for_mask (var, vinfo);
3652 tree cst0, cst1, tmp;
3654 if (!type)
3655 return NULL;
3657 /* We may directly use cond with narrowed type to avoid
3658 multiple cond exprs with following result packing and
3659 perform single cond with packed mask instead. In case
3660 of widening we better make cond first and then extract
3661 results. */
3662 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3663 type = TREE_TYPE (lhs);
3665 cst0 = build_int_cst (type, 0);
3666 cst1 = build_int_cst (type, 1);
3667 tmp = vect_recog_temp_ssa_var (type, NULL);
3668 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3670 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3672 tree new_vectype = get_vectype_for_scalar_type (type);
3673 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3674 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3675 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3676 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3678 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3679 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3683 *type_out = vectype;
3684 stmts->safe_push (last_stmt);
3685 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3687 return pattern_stmt;
3689 else if (rhs_code == COND_EXPR
3690 && TREE_CODE (var) == SSA_NAME)
3692 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3693 if (vectype == NULL_TREE)
3694 return NULL;
3696 /* Build a scalar type for the boolean result that when
3697 vectorized matches the vector type of the result in
3698 size and number of elements. */
3699 unsigned prec
3700 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3701 TYPE_VECTOR_SUBPARTS (vectype));
3703 tree type
3704 = build_nonstandard_integer_type (prec,
3705 TYPE_UNSIGNED (TREE_TYPE (var)));
3706 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3707 return NULL;
3709 if (!check_bool_pattern (var, vinfo, bool_stmts))
3710 return NULL;
3712 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3714 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3715 pattern_stmt
3716 = gimple_build_assign (lhs, COND_EXPR,
3717 build2 (NE_EXPR, boolean_type_node,
3718 rhs, build_int_cst (type, 0)),
3719 gimple_assign_rhs2 (last_stmt),
3720 gimple_assign_rhs3 (last_stmt));
3721 *type_out = vectype;
3722 stmts->safe_push (last_stmt);
3723 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3725 return pattern_stmt;
3727 else if (rhs_code == SSA_NAME
3728 && STMT_VINFO_DATA_REF (stmt_vinfo))
3730 stmt_vec_info pattern_stmt_info;
3731 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3732 gcc_assert (vectype != NULL_TREE);
3733 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3734 return NULL;
3736 if (check_bool_pattern (var, vinfo, bool_stmts))
3737 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3738 else
3740 tree type = search_type_for_mask (var, vinfo);
3741 tree cst0, cst1, new_vectype;
3743 if (!type)
3744 return NULL;
3746 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3747 type = TREE_TYPE (vectype);
3749 cst0 = build_int_cst (type, 0);
3750 cst1 = build_int_cst (type, 1);
3751 new_vectype = get_vectype_for_scalar_type (type);
3753 rhs = vect_recog_temp_ssa_var (type, NULL);
3754 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3756 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3757 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3758 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3759 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3762 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3763 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3765 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3766 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3767 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3768 rhs = rhs2;
3770 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3771 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3772 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3773 STMT_VINFO_DATA_REF (pattern_stmt_info)
3774 = STMT_VINFO_DATA_REF (stmt_vinfo);
3775 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3776 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3777 *type_out = vectype;
3778 stmts->safe_push (last_stmt);
3779 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3781 return pattern_stmt;
3783 else
3784 return NULL;
3788 /* A helper for vect_recog_mask_conversion_pattern. Build
3789 conversion of MASK to a type suitable for masking VECTYPE.
3790 Built statement gets required vectype and is appended to
3791 a pattern sequence of STMT_VINFO.
3793 Return converted mask. */
3795 static tree
3796 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3797 vec_info *vinfo)
3799 gimple *stmt;
3800 tree masktype, tmp;
3801 stmt_vec_info new_stmt_info;
3803 masktype = build_same_sized_truth_vector_type (vectype);
3804 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3805 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3806 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3807 set_vinfo_for_stmt (stmt, new_stmt_info);
3808 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3809 append_pattern_def_seq (stmt_vinfo, stmt);
3811 return tmp;
3815 /* Function vect_recog_mask_conversion_pattern
3817 Try to find statements which require boolean type
3818 converison. Additional conversion statements are
3819 added to handle such cases. For example:
3821 bool m_1, m_2, m_3;
3822 int i_4, i_5;
3823 double d_6, d_7;
3824 char c_1, c_2, c_3;
3826 S1 m_1 = i_4 > i_5;
3827 S2 m_2 = d_6 < d_7;
3828 S3 m_3 = m_1 & m_2;
3829 S4 c_1 = m_3 ? c_2 : c_3;
3831 Will be transformed into:
3833 S1 m_1 = i_4 > i_5;
3834 S2 m_2 = d_6 < d_7;
3835 S3'' m_2' = (_Bool[bitsize=32])m_2
3836 S3' m_3' = m_1 & m_2';
3837 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3838 S4' c_1' = m_3'' ? c_2 : c_3; */
3840 static gimple *
3841 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_out)
3843 gimple *last_stmt = stmts->pop ();
3844 enum tree_code rhs_code;
3845 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3846 tree vectype1, vectype2;
3847 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3848 stmt_vec_info pattern_stmt_info;
3849 vec_info *vinfo = stmt_vinfo->vinfo;
3851 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3852 if (is_gimple_call (last_stmt)
3853 && gimple_call_internal_p (last_stmt)
3854 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3855 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3857 gcall *pattern_stmt;
3858 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3860 if (load)
3862 lhs = gimple_call_lhs (last_stmt);
3863 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3865 else
3867 rhs2 = gimple_call_arg (last_stmt, 3);
3868 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3871 rhs1 = gimple_call_arg (last_stmt, 2);
3872 rhs1_type = search_type_for_mask (rhs1, vinfo);
3873 if (!rhs1_type)
3874 return NULL;
3875 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3877 if (!vectype1 || !vectype2
3878 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3879 TYPE_VECTOR_SUBPARTS (vectype2)))
3880 return NULL;
3882 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3884 if (load)
3886 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3887 pattern_stmt
3888 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3889 gimple_call_arg (last_stmt, 0),
3890 gimple_call_arg (last_stmt, 1),
3891 tmp);
3892 gimple_call_set_lhs (pattern_stmt, lhs);
3894 else
3895 pattern_stmt
3896 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3897 gimple_call_arg (last_stmt, 0),
3898 gimple_call_arg (last_stmt, 1),
3899 tmp,
3900 gimple_call_arg (last_stmt, 3));
3902 gimple_call_set_nothrow (pattern_stmt, true);
3904 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3905 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3906 STMT_VINFO_DATA_REF (pattern_stmt_info)
3907 = STMT_VINFO_DATA_REF (stmt_vinfo);
3908 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3909 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3910 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
3911 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo);
3913 *type_out = vectype1;
3914 stmts->safe_push (last_stmt);
3915 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3917 return pattern_stmt;
3920 if (!is_gimple_assign (last_stmt))
3921 return NULL;
3923 gimple *pattern_stmt;
3924 lhs = gimple_assign_lhs (last_stmt);
3925 rhs1 = gimple_assign_rhs1 (last_stmt);
3926 rhs_code = gimple_assign_rhs_code (last_stmt);
3928 /* Check for cond expression requiring mask conversion. */
3929 if (rhs_code == COND_EXPR)
3931 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3933 if (TREE_CODE (rhs1) == SSA_NAME)
3935 rhs1_type = search_type_for_mask (rhs1, vinfo);
3936 if (!rhs1_type)
3937 return NULL;
3939 else if (COMPARISON_CLASS_P (rhs1))
3941 /* Check whether we're comparing scalar booleans and (if so)
3942 whether a better mask type exists than the mask associated
3943 with boolean-sized elements. This avoids unnecessary packs
3944 and unpacks if the booleans are set from comparisons of
3945 wider types. E.g. in:
3947 int x1, x2, x3, x4, y1, y1;
3949 bool b1 = (x1 == x2);
3950 bool b2 = (x3 == x4);
3951 ... = b1 == b2 ? y1 : y2;
3953 it is better for b1 and b2 to use the mask type associated
3954 with int elements rather bool (byte) elements. */
3955 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
3956 if (!rhs1_type)
3957 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3959 else
3960 return NULL;
3962 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3964 if (!vectype1 || !vectype2)
3965 return NULL;
3967 /* Continue if a conversion is needed. Also continue if we have
3968 a comparison whose vector type would normally be different from
3969 VECTYPE2 when considered in isolation. In that case we'll
3970 replace the comparison with an SSA name (so that we can record
3971 its vector type) and behave as though the comparison was an SSA
3972 name from the outset. */
3973 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3974 TYPE_VECTOR_SUBPARTS (vectype2))
3975 && (TREE_CODE (rhs1) == SSA_NAME
3976 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
3977 return NULL;
3979 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
3980 in place, we can handle it in vectorizable_condition. This avoids
3981 unnecessary promotion stmts and increased vectorization factor. */
3982 if (COMPARISON_CLASS_P (rhs1)
3983 && INTEGRAL_TYPE_P (rhs1_type)
3984 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
3985 TYPE_VECTOR_SUBPARTS (vectype2)))
3987 enum vect_def_type dt;
3988 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
3989 && dt == vect_external_def
3990 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
3991 && (dt == vect_external_def
3992 || dt == vect_constant_def))
3994 tree wide_scalar_type = build_nonstandard_integer_type
3995 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
3996 TYPE_UNSIGNED (rhs1_type));
3997 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
3998 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
3999 return NULL;
4003 /* If rhs1 is a comparison we need to move it into a
4004 separate statement. */
4005 if (TREE_CODE (rhs1) != SSA_NAME)
4007 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4008 pattern_stmt = gimple_build_assign (tmp, rhs1);
4009 rhs1 = tmp;
4011 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4012 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4013 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4014 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4017 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4018 TYPE_VECTOR_SUBPARTS (vectype2)))
4019 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4020 else
4021 tmp = rhs1;
4023 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4024 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4025 gimple_assign_rhs2 (last_stmt),
4026 gimple_assign_rhs3 (last_stmt));
4028 *type_out = vectype1;
4029 stmts->safe_push (last_stmt);
4030 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4032 return pattern_stmt;
4035 /* Now check for binary boolean operations requiring conversion for
4036 one of operands. */
4037 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4038 return NULL;
4040 if (rhs_code != BIT_IOR_EXPR
4041 && rhs_code != BIT_XOR_EXPR
4042 && rhs_code != BIT_AND_EXPR
4043 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4044 return NULL;
4046 rhs2 = gimple_assign_rhs2 (last_stmt);
4048 rhs1_type = search_type_for_mask (rhs1, vinfo);
4049 rhs2_type = search_type_for_mask (rhs2, vinfo);
4051 if (!rhs1_type || !rhs2_type
4052 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4053 return NULL;
4055 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4057 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4058 if (!vectype1)
4059 return NULL;
4060 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4062 else
4064 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4065 if (!vectype1)
4066 return NULL;
4067 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4070 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4071 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4073 *type_out = vectype1;
4074 stmts->safe_push (last_stmt);
4075 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4077 return pattern_stmt;
4080 /* STMT is a load or store. If the load or store is conditional, return
4081 the boolean condition under which it occurs, otherwise return null. */
4083 static tree
4084 vect_get_load_store_mask (gimple *stmt)
4086 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4088 gcc_assert (gimple_assign_single_p (def_assign));
4089 return NULL_TREE;
4092 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4094 internal_fn ifn = gimple_call_internal_fn (def_call);
4095 int mask_index = internal_fn_mask_index (ifn);
4096 return gimple_call_arg (def_call, mask_index);
4099 gcc_unreachable ();
4102 /* Return the scalar offset type that an internal gather/scatter function
4103 should use. GS_INFO describes the gather/scatter operation. */
4105 static tree
4106 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4108 tree offset_type = TREE_TYPE (gs_info->offset);
4109 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4111 /* Enforced by vect_check_gather_scatter. */
4112 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4113 gcc_assert (element_bits >= offset_bits);
4115 /* If the offset is narrower than the elements, extend it according
4116 to its sign. */
4117 if (element_bits > offset_bits)
4118 return build_nonstandard_integer_type (element_bits,
4119 TYPE_UNSIGNED (offset_type));
4121 return offset_type;
4124 /* Return MASK if MASK is suitable for masking an operation on vectors
4125 of type VECTYPE, otherwise convert it into such a form and return
4126 the result. Associate any conversion statements with STMT_INFO's
4127 pattern. */
4129 static tree
4130 vect_convert_mask_for_vectype (tree mask, tree vectype,
4131 stmt_vec_info stmt_info, vec_info *vinfo)
4133 tree mask_type = search_type_for_mask (mask, vinfo);
4134 if (mask_type)
4136 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4137 if (mask_vectype
4138 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4139 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4140 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4142 return mask;
4145 /* Return the equivalent of:
4147 fold_convert (TYPE, VALUE)
4149 with the expectation that the operation will be vectorized.
4150 If new statements are needed, add them as pattern statements
4151 to STMT_INFO. */
4153 static tree
4154 vect_add_conversion_to_patterm (tree type, tree value,
4155 stmt_vec_info stmt_info,
4156 vec_info *vinfo)
4158 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4159 return value;
4161 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4162 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4163 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4164 set_vinfo_for_stmt (conversion, new_stmt_info);
4165 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4166 append_pattern_def_seq (stmt_info, conversion);
4167 return new_value;
4170 /* Try to convert STMT into a call to a gather load or scatter store
4171 internal function. Return the final statement on success and set
4172 *TYPE_OUT to the vector type being loaded or stored.
4174 This function only handles gathers and scatters that were recognized
4175 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4177 static gimple *
4178 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4179 tree *type_out)
4181 /* Currently we only support this for loop vectorization. */
4182 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4183 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4184 if (!loop_vinfo)
4185 return NULL;
4187 /* Make sure that we're looking at a gather load or scatter store. */
4188 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4189 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4190 return NULL;
4192 /* Get the boolean that controls whether the load or store happens.
4193 This is null if the operation is unconditional. */
4194 tree mask = vect_get_load_store_mask (stmt);
4196 /* Make sure that the target supports an appropriate internal
4197 function for the gather/scatter operation. */
4198 gather_scatter_info gs_info;
4199 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4200 || gs_info.decl)
4201 return NULL;
4203 /* Convert the mask to the right form. */
4204 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4205 if (mask)
4206 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4207 loop_vinfo);
4209 /* Get the invariant base and non-invariant offset, converting the
4210 latter to the same width as the vector elements. */
4211 tree base = gs_info.base;
4212 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4213 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4214 last_stmt_info, loop_vinfo);
4216 /* Build the new pattern statement. */
4217 tree scale = size_int (gs_info.scale);
4218 gcall *pattern_stmt;
4219 if (DR_IS_READ (dr))
4221 if (mask != NULL)
4222 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4223 offset, scale, mask);
4224 else
4225 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4226 offset, scale);
4227 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4228 gimple_call_set_lhs (pattern_stmt, load_lhs);
4230 else
4232 tree rhs = vect_get_store_rhs (stmt);
4233 if (mask != NULL)
4234 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4235 base, offset, scale, rhs,
4236 mask);
4237 else
4238 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4239 base, offset, scale, rhs);
4241 gimple_call_set_nothrow (pattern_stmt, true);
4243 /* Copy across relevant vectorization info and associate DR with the
4244 new pattern statement instead of the original statement. */
4245 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4246 loop_vinfo);
4247 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4248 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4249 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4250 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4251 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4252 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4254 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4255 *type_out = vectype;
4256 vect_pattern_detected ("gather/scatter pattern", stmt);
4258 return pattern_stmt;
4261 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4263 static gimple *
4264 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_out)
4266 gimple *last_stmt = stmts->pop ();
4267 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4268 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4269 last_stmt_info,
4270 type_out);
4271 if (pattern_stmt)
4272 stmts->safe_push (last_stmt);
4273 return pattern_stmt;
4276 /* Return true if TYPE is a non-boolean integer type. These are the types
4277 that we want to consider for narrowing. */
4279 static bool
4280 vect_narrowable_type_p (tree type)
4282 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4285 /* Return true if the operation given by CODE can be truncated to N bits
4286 when only N bits of the output are needed. This is only true if bit N+1
4287 of the inputs has no effect on the low N bits of the result. */
4289 static bool
4290 vect_truncatable_operation_p (tree_code code)
4292 switch (code)
4294 case PLUS_EXPR:
4295 case MINUS_EXPR:
4296 case MULT_EXPR:
4297 case BIT_AND_EXPR:
4298 case BIT_IOR_EXPR:
4299 case BIT_XOR_EXPR:
4300 case COND_EXPR:
4301 return true;
4303 default:
4304 return false;
4308 /* Record that STMT_INFO could be changed from operating on TYPE to
4309 operating on a type with the precision and sign given by PRECISION
4310 and SIGN respectively. PRECISION is an arbitrary bit precision;
4311 it might not be a whole number of bytes. */
4313 static void
4314 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4315 unsigned int precision, signop sign)
4317 /* Round the precision up to a whole number of bytes. */
4318 precision = vect_element_precision (precision);
4319 if (precision < TYPE_PRECISION (type)
4320 && (!stmt_info->operation_precision
4321 || stmt_info->operation_precision > precision))
4323 stmt_info->operation_precision = precision;
4324 stmt_info->operation_sign = sign;
4328 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4329 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4330 is an arbitrary bit precision; it might not be a whole number of bytes. */
4332 static void
4333 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4334 unsigned int min_input_precision)
4336 /* This operation in isolation only requires the inputs to have
4337 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4338 that MIN_INPUT_PRECISION is a natural precision for the chain
4339 as a whole. E.g. consider something like:
4341 unsigned short *x, *y;
4342 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4344 The right shift can be done on unsigned chars, and only requires the
4345 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4346 approach would mean turning a natural chain of single-vector unsigned
4347 short operations into one that truncates "*x" and then extends
4348 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4349 operation and one vector for each unsigned char operation.
4350 This would be a significant pessimization.
4352 Instead only propagate the maximum of this precision and the precision
4353 required by the users of the result. This means that we don't pessimize
4354 the case above but continue to optimize things like:
4356 unsigned char *y;
4357 unsigned short *x;
4358 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4360 Here we would truncate two vectors of *x to a single vector of
4361 unsigned chars and use single-vector unsigned char operations for
4362 everything else, rather than doing two unsigned short copies of
4363 "(*x & 0xf0) >> 4" and then truncating the result. */
4364 min_input_precision = MAX (min_input_precision,
4365 stmt_info->min_output_precision);
4367 if (min_input_precision < TYPE_PRECISION (type)
4368 && (!stmt_info->min_input_precision
4369 || stmt_info->min_input_precision > min_input_precision))
4370 stmt_info->min_input_precision = min_input_precision;
4373 /* Subroutine of vect_determine_min_output_precision. Return true if
4374 we can calculate a reduced number of output bits for STMT_INFO,
4375 whose result is LHS. */
4377 static bool
4378 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4380 /* Take the maximum precision required by users of the result. */
4381 unsigned int precision = 0;
4382 imm_use_iterator iter;
4383 use_operand_p use;
4384 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4386 gimple *use_stmt = USE_STMT (use);
4387 if (is_gimple_debug (use_stmt))
4388 continue;
4389 if (!vect_stmt_in_region_p (stmt_info->vinfo, use_stmt))
4390 return false;
4391 stmt_vec_info use_stmt_info = vinfo_for_stmt (use_stmt);
4392 if (!use_stmt_info->min_input_precision)
4393 return false;
4394 precision = MAX (precision, use_stmt_info->min_input_precision);
4397 if (dump_enabled_p ())
4399 dump_printf_loc (MSG_NOTE, vect_location, "only the low %d bits of ",
4400 precision);
4401 dump_generic_expr (MSG_NOTE, TDF_SLIM, lhs);
4402 dump_printf (MSG_NOTE, " are significant\n");
4404 stmt_info->min_output_precision = precision;
4405 return true;
4408 /* Calculate min_output_precision for STMT_INFO. */
4410 static void
4411 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4413 /* We're only interested in statements with a narrowable result. */
4414 tree lhs = gimple_get_lhs (stmt_info->stmt);
4415 if (!lhs
4416 || TREE_CODE (lhs) != SSA_NAME
4417 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4418 return;
4420 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4421 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4424 /* Use range information to decide whether STMT (described by STMT_INFO)
4425 could be done in a narrower type. This is effectively a forward
4426 propagation, since it uses context-independent information that applies
4427 to all users of an SSA name. */
4429 static void
4430 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4432 tree lhs = gimple_assign_lhs (stmt);
4433 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4434 return;
4436 tree type = TREE_TYPE (lhs);
4437 if (!vect_narrowable_type_p (type))
4438 return;
4440 /* First see whether we have any useful range information for the result. */
4441 unsigned int precision = TYPE_PRECISION (type);
4442 signop sign = TYPE_SIGN (type);
4443 wide_int min_value, max_value;
4444 if (!vect_get_range_info (lhs, &min_value, &max_value))
4445 return;
4447 tree_code code = gimple_assign_rhs_code (stmt);
4448 unsigned int nops = gimple_num_ops (stmt);
4450 if (!vect_truncatable_operation_p (code))
4451 /* Check that all relevant input operands are compatible, and update
4452 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4453 for (unsigned int i = 1; i < nops; ++i)
4455 tree op = gimple_op (stmt, i);
4456 if (TREE_CODE (op) == INTEGER_CST)
4458 /* Don't require the integer to have RHS_TYPE (which it might
4459 not for things like shift amounts, etc.), but do require it
4460 to fit the type. */
4461 if (!int_fits_type_p (op, type))
4462 return;
4464 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4465 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4467 else if (TREE_CODE (op) == SSA_NAME)
4469 /* Ignore codes that don't take uniform arguments. */
4470 if (!types_compatible_p (TREE_TYPE (op), type))
4471 return;
4473 wide_int op_min_value, op_max_value;
4474 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4475 return;
4477 min_value = wi::min (min_value, op_min_value, sign);
4478 max_value = wi::max (max_value, op_max_value, sign);
4480 else
4481 return;
4484 /* Try to switch signed types for unsigned types if we can.
4485 This is better for two reasons. First, unsigned ops tend
4486 to be cheaper than signed ops. Second, it means that we can
4487 handle things like:
4489 signed char c;
4490 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4494 signed char c;
4495 unsigned short res_1 = (unsigned short) c & 0xff00;
4496 int res = (int) res_1;
4498 where the intermediate result res_1 has unsigned rather than
4499 signed type. */
4500 if (sign == SIGNED && !wi::neg_p (min_value))
4501 sign = UNSIGNED;
4503 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4504 unsigned int precision1 = wi::min_precision (min_value, sign);
4505 unsigned int precision2 = wi::min_precision (max_value, sign);
4506 unsigned int value_precision = MAX (precision1, precision2);
4507 if (value_precision >= precision)
4508 return;
4510 if (dump_enabled_p ())
4512 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4513 " without loss of precision: ",
4514 sign == SIGNED ? "signed" : "unsigned",
4515 value_precision);
4516 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4519 vect_set_operation_type (stmt_info, type, value_precision, sign);
4520 vect_set_min_input_precision (stmt_info, type, value_precision);
4523 /* Use information about the users of STMT's result to decide whether
4524 STMT (described by STMT_INFO) could be done in a narrower type.
4525 This is effectively a backward propagation. */
4527 static void
4528 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4530 tree_code code = gimple_assign_rhs_code (stmt);
4531 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4532 tree type = TREE_TYPE (gimple_op (stmt, opno));
4533 if (!vect_narrowable_type_p (type))
4534 return;
4536 unsigned int precision = TYPE_PRECISION (type);
4537 unsigned int operation_precision, min_input_precision;
4538 switch (code)
4540 CASE_CONVERT:
4541 /* Only the bits that contribute to the output matter. Don't change
4542 the precision of the operation itself. */
4543 operation_precision = precision;
4544 min_input_precision = stmt_info->min_output_precision;
4545 break;
4547 case LSHIFT_EXPR:
4548 case RSHIFT_EXPR:
4550 tree shift = gimple_assign_rhs2 (stmt);
4551 if (TREE_CODE (shift) != INTEGER_CST
4552 || !wi::ltu_p (wi::to_widest (shift), precision))
4553 return;
4554 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4555 if (code == LSHIFT_EXPR)
4557 /* We need CONST_SHIFT fewer bits of the input. */
4558 operation_precision = stmt_info->min_output_precision;
4559 min_input_precision = (MAX (operation_precision, const_shift)
4560 - const_shift);
4562 else
4564 /* We need CONST_SHIFT extra bits to do the operation. */
4565 operation_precision = (stmt_info->min_output_precision
4566 + const_shift);
4567 min_input_precision = operation_precision;
4569 break;
4572 default:
4573 if (vect_truncatable_operation_p (code))
4575 /* Input bit N has no effect on output bits N-1 and lower. */
4576 operation_precision = stmt_info->min_output_precision;
4577 min_input_precision = operation_precision;
4578 break;
4580 return;
4583 if (operation_precision < precision)
4585 if (dump_enabled_p ())
4587 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4588 " without affecting users: ",
4589 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4590 operation_precision);
4591 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4593 vect_set_operation_type (stmt_info, type, operation_precision,
4594 TYPE_SIGN (type));
4596 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4599 /* Handle vect_determine_precisions for STMT_INFO, given that we
4600 have already done so for the users of its result. */
4602 void
4603 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4605 vect_determine_min_output_precision (stmt_info);
4606 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4608 vect_determine_precisions_from_range (stmt_info, stmt);
4609 vect_determine_precisions_from_users (stmt_info, stmt);
4613 /* Walk backwards through the vectorizable region to determine the
4614 values of these fields:
4616 - min_output_precision
4617 - min_input_precision
4618 - operation_precision
4619 - operation_sign. */
4621 void
4622 vect_determine_precisions (vec_info *vinfo)
4624 DUMP_VECT_SCOPE ("vect_determine_precisions");
4626 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4628 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4629 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4630 unsigned int nbbs = loop->num_nodes;
4632 for (unsigned int i = 0; i < nbbs; i++)
4634 basic_block bb = bbs[nbbs - i - 1];
4635 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4636 !gsi_end_p (si); gsi_prev (&si))
4637 vect_determine_stmt_precisions (vinfo_for_stmt (gsi_stmt (si)));
4640 else
4642 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4643 gimple_stmt_iterator si = bb_vinfo->region_end;
4644 gimple *stmt;
4647 if (!gsi_stmt (si))
4648 si = gsi_last_bb (bb_vinfo->bb);
4649 else
4650 gsi_prev (&si);
4651 stmt = gsi_stmt (si);
4652 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4653 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4654 vect_determine_stmt_precisions (stmt_info);
4656 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4660 typedef gimple *(*vect_recog_func_ptr) (vec<gimple *> *, tree *);
4662 struct vect_recog_func
4664 vect_recog_func_ptr fn;
4665 const char *name;
4668 /* Note that ordering matters - the first pattern matching on a stmt is
4669 taken which means usually the more complex one needs to preceed the
4670 less comples onex (widen_sum only after dot_prod or sad for example). */
4671 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4672 { vect_recog_over_widening_pattern, "over_widening" },
4673 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4674 { vect_recog_widen_mult_pattern, "widen_mult" },
4675 { vect_recog_dot_prod_pattern, "dot_prod" },
4676 { vect_recog_sad_pattern, "sad" },
4677 { vect_recog_widen_sum_pattern, "widen_sum" },
4678 { vect_recog_pow_pattern, "pow" },
4679 { vect_recog_widen_shift_pattern, "widen_shift" },
4680 { vect_recog_rotate_pattern, "rotate" },
4681 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4682 { vect_recog_divmod_pattern, "divmod" },
4683 { vect_recog_mult_pattern, "mult" },
4684 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4685 { vect_recog_bool_pattern, "bool" },
4686 /* This must come before mask conversion, and includes the parts
4687 of mask conversion that are needed for gather and scatter
4688 internal functions. */
4689 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4690 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4693 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4695 /* Mark statements that are involved in a pattern. */
4697 static inline void
4698 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4699 tree pattern_vectype)
4701 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4702 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4704 bool old_pattern_p = is_pattern_stmt_p (orig_stmt_info);
4705 if (old_pattern_p)
4707 /* We're replacing a statement in an existing pattern definition
4708 sequence. */
4709 if (dump_enabled_p ())
4711 dump_printf_loc (MSG_NOTE, vect_location,
4712 "replacing earlier pattern ");
4713 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, orig_stmt, 0);
4716 /* To keep the book-keeping simple, just swap the lhs of the
4717 old and new statements, so that the old one has a valid but
4718 unused lhs. */
4719 tree old_lhs = gimple_get_lhs (orig_stmt);
4720 gimple_set_lhs (orig_stmt, gimple_get_lhs (pattern_stmt));
4721 gimple_set_lhs (pattern_stmt, old_lhs);
4723 if (dump_enabled_p ())
4725 dump_printf_loc (MSG_NOTE, vect_location, "with ");
4726 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4729 /* Switch to the statement that ORIG replaces. */
4730 orig_stmt_info
4731 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (orig_stmt_info));
4733 /* We shouldn't be replacing the main pattern statement. */
4734 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) != orig_stmt);
4737 if (def_seq)
4738 for (gimple_stmt_iterator si = gsi_start (def_seq);
4739 !gsi_end_p (si); gsi_next (&si))
4740 vect_init_pattern_stmt (gsi_stmt (si), orig_stmt_info, pattern_vectype);
4742 if (old_pattern_p)
4744 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4746 /* Insert all the new pattern statements before the original one. */
4747 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4748 gimple_stmt_iterator gsi = gsi_for_stmt (orig_stmt, orig_def_seq);
4749 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4750 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4752 /* Remove the pattern statement that this new pattern replaces. */
4753 gsi_remove (&gsi, false);
4755 else
4756 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4759 /* Function vect_pattern_recog_1
4761 Input:
4762 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4763 computation pattern.
4764 STMT: A stmt from which the pattern search should start.
4766 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4767 a sequence of statements that has the same functionality and can be
4768 used to replace STMT. It returns the last statement in the sequence
4769 and adds any earlier statements to STMT's STMT_VINFO_PATTERN_DEF_SEQ.
4770 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4771 statement, having first checked that the target supports the new operation
4772 in that type.
4774 This function also does some bookkeeping, as explained in the documentation
4775 for vect_recog_pattern. */
4777 static void
4778 vect_pattern_recog_1 (vect_recog_func *recog_func,
4779 gimple_stmt_iterator si,
4780 vec<gimple *> *stmts_to_replace)
4782 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4783 stmt_vec_info stmt_info;
4784 loop_vec_info loop_vinfo;
4785 tree pattern_vectype;
4786 int i;
4788 /* If this statement has already been replaced with pattern statements,
4789 leave the original statement alone, since the first match wins.
4790 Instead try to match against the definition statements that feed
4791 the main pattern statement. */
4792 stmt_info = vinfo_for_stmt (stmt);
4793 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4795 gimple_stmt_iterator gsi;
4796 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4797 !gsi_end_p (gsi); gsi_next (&gsi))
4798 vect_pattern_recog_1 (recog_func, gsi, stmts_to_replace);
4799 return;
4802 stmts_to_replace->truncate (0);
4803 stmts_to_replace->quick_push (stmt);
4804 pattern_stmt = recog_func->fn (stmts_to_replace, &pattern_vectype);
4805 if (!pattern_stmt)
4807 /* Clear related stmt info that analysis might have noted for
4808 to be replaced stmts. */
4809 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4810 && (unsigned) i < stmts_to_replace->length ();
4811 i++)
4813 stmt_info = vinfo_for_stmt (stmt);
4814 if (!is_pattern_stmt_p (stmt_info))
4815 STMT_VINFO_RELATED_STMT (stmt_info) = NULL;
4817 /* Clear any half-formed pattern definition sequence. */
4818 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4819 return;
4822 stmt = stmts_to_replace->last ();
4823 stmt_info = vinfo_for_stmt (stmt);
4824 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4825 gcc_assert (pattern_vectype);
4827 /* Found a vectorizable pattern. */
4828 if (dump_enabled_p ())
4830 dump_printf_loc (MSG_NOTE, vect_location,
4831 "%s pattern recognized: ", recog_func->name);
4832 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4835 /* Mark the stmts that are involved in the pattern. */
4836 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4838 /* Patterns cannot be vectorized using SLP, because they change the order of
4839 computation. */
4840 if (loop_vinfo)
4842 unsigned ix, ix2;
4843 gimple **elem_ptr;
4844 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4845 elem_ptr, *elem_ptr == stmt);
4848 /* It is possible that additional pattern stmts are created and inserted in
4849 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4850 relevant statements. */
4851 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4852 && (unsigned) i < (stmts_to_replace->length () - 1);
4853 i++)
4855 stmt_info = vinfo_for_stmt (stmt);
4856 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4857 if (dump_enabled_p ())
4859 dump_printf_loc (MSG_NOTE, vect_location,
4860 "additional pattern stmt: ");
4861 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4864 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4867 return;
4871 /* Function vect_pattern_recog
4873 Input:
4874 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4875 computation idioms.
4877 Output - for each computation idiom that is detected we create a new stmt
4878 that provides the same functionality and that can be vectorized. We
4879 also record some information in the struct_stmt_info of the relevant
4880 stmts, as explained below:
4882 At the entry to this function we have the following stmts, with the
4883 following initial value in the STMT_VINFO fields:
4885 stmt in_pattern_p related_stmt vec_stmt
4886 S1: a_i = .... - - -
4887 S2: a_2 = ..use(a_i).. - - -
4888 S3: a_1 = ..use(a_2).. - - -
4889 S4: a_0 = ..use(a_1).. - - -
4890 S5: ... = ..use(a_0).. - - -
4892 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4893 represented by a single stmt. We then:
4894 - create a new stmt S6 equivalent to the pattern (the stmt is not
4895 inserted into the code)
4896 - fill in the STMT_VINFO fields as follows:
4898 in_pattern_p related_stmt vec_stmt
4899 S1: a_i = .... - - -
4900 S2: a_2 = ..use(a_i).. - - -
4901 S3: a_1 = ..use(a_2).. - - -
4902 S4: a_0 = ..use(a_1).. true S6 -
4903 '---> S6: a_new = .... - S4 -
4904 S5: ... = ..use(a_0).. - - -
4906 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4907 to each other through the RELATED_STMT field).
4909 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4910 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4911 remain irrelevant unless used by stmts other than S4.
4913 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4914 (because they are marked as irrelevant). It will vectorize S6, and record
4915 a pointer to the new vector stmt VS6 from S6 (as usual).
4916 S4 will be skipped, and S5 will be vectorized as usual:
4918 in_pattern_p related_stmt vec_stmt
4919 S1: a_i = .... - - -
4920 S2: a_2 = ..use(a_i).. - - -
4921 S3: a_1 = ..use(a_2).. - - -
4922 > VS6: va_new = .... - - -
4923 S4: a_0 = ..use(a_1).. true S6 VS6
4924 '---> S6: a_new = .... - S4 VS6
4925 > VS5: ... = ..vuse(va_new).. - - -
4926 S5: ... = ..use(a_0).. - - -
4928 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4929 elsewhere), and we'll end up with:
4931 VS6: va_new = ....
4932 VS5: ... = ..vuse(va_new)..
4934 In case of more than one pattern statements, e.g., widen-mult with
4935 intermediate type:
4937 S1 a_t = ;
4938 S2 a_T = (TYPE) a_t;
4939 '--> S3: a_it = (interm_type) a_t;
4940 S4 prod_T = a_T * CONST;
4941 '--> S5: prod_T' = a_it w* CONST;
4943 there may be other users of a_T outside the pattern. In that case S2 will
4944 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4945 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4946 be recorded in S3. */
4948 void
4949 vect_pattern_recog (vec_info *vinfo)
4951 struct loop *loop;
4952 basic_block *bbs;
4953 unsigned int nbbs;
4954 gimple_stmt_iterator si;
4955 unsigned int i, j;
4956 auto_vec<gimple *, 1> stmts_to_replace;
4958 vect_determine_precisions (vinfo);
4960 DUMP_VECT_SCOPE ("vect_pattern_recog");
4962 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4964 loop = LOOP_VINFO_LOOP (loop_vinfo);
4965 bbs = LOOP_VINFO_BBS (loop_vinfo);
4966 nbbs = loop->num_nodes;
4968 /* Scan through the loop stmts, applying the pattern recognition
4969 functions starting at each stmt visited: */
4970 for (i = 0; i < nbbs; i++)
4972 basic_block bb = bbs[i];
4973 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4974 /* Scan over all generic vect_recog_xxx_pattern functions. */
4975 for (j = 0; j < NUM_PATTERNS; j++)
4976 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4977 &stmts_to_replace);
4980 else
4982 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4983 for (si = bb_vinfo->region_begin;
4984 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4986 gimple *stmt = gsi_stmt (si);
4987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4988 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
4989 continue;
4991 /* Scan over all generic vect_recog_xxx_pattern functions. */
4992 for (j = 0; j < NUM_PATTERNS; j++)
4993 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4994 &stmts_to_replace);