[16/n] PR85694: Add detection of averaging operations
[official-gcc.git] / gcc / tree-vect-patterns.c
blob51defa08627b2fa8a475e0266124335a6f2bc950
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
53 static bool
54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
56 value_range_type vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
71 return true;
73 else
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
80 return false;
84 /* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
87 static void
88 vect_pattern_detected (const char *name, gimple *stmt)
90 if (dump_enabled_p ())
92 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: ", name);
93 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
97 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO.
98 Set its vector type to VECTYPE if it doesn't have one already. */
100 static void
101 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
102 tree vectype)
104 stmt_vec_info pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
105 if (pattern_stmt_info == NULL)
107 pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
108 orig_stmt_info->vinfo);
109 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
111 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
113 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info->stmt;
114 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
115 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
116 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
117 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
120 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
121 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
122 have one already. */
124 static void
125 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
126 tree vectype)
128 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
129 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
130 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
133 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
134 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
135 be different from the vector type of the final pattern statement. */
137 static inline void
138 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
139 tree vectype = NULL_TREE)
141 vec_info *vinfo = stmt_info->vinfo;
142 if (vectype)
144 gcc_assert (!vinfo_for_stmt (new_stmt));
145 stmt_vec_info new_stmt_info = new_stmt_vec_info (new_stmt, vinfo);
146 set_vinfo_for_stmt (new_stmt, new_stmt_info);
147 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
149 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
150 new_stmt);
153 static inline void
154 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
156 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
157 append_pattern_def_seq (stmt_info, stmt);
160 /* The caller wants to perform new operations on vect_external variable
161 VAR, so that the result of the operations would also be vect_external.
162 Return the edge on which the operations can be performed, if one exists.
163 Return null if the operations should instead be treated as part of
164 the pattern that needs them. */
166 static edge
167 vect_get_external_def_edge (vec_info *vinfo, tree var)
169 edge e = NULL;
170 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
172 e = loop_preheader_edge (loop_vinfo->loop);
173 if (!SSA_NAME_IS_DEFAULT_DEF (var))
175 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
176 if (bb == NULL
177 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
178 e = NULL;
181 return e;
184 /* Return true if the target supports a vector version of CODE,
185 where CODE is known to map to a direct optab. ITYPE specifies
186 the type of (some of) the scalar inputs and OTYPE specifies the
187 type of the scalar result.
189 If CODE allows the inputs and outputs to have different type
190 (such as for WIDEN_SUM_EXPR), it is the input mode rather
191 than the output mode that determines the appropriate target pattern.
192 Operand 0 of the target pattern then specifies the mode that the output
193 must have.
195 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
196 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
197 is nonnull. */
199 static bool
200 vect_supportable_direct_optab_p (tree otype, tree_code code,
201 tree itype, tree *vecotype_out,
202 tree *vecitype_out = NULL)
204 tree vecitype = get_vectype_for_scalar_type (itype);
205 if (!vecitype)
206 return false;
208 tree vecotype = get_vectype_for_scalar_type (otype);
209 if (!vecotype)
210 return false;
212 optab optab = optab_for_tree_code (code, vecitype, optab_default);
213 if (!optab)
214 return false;
216 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
217 if (icode == CODE_FOR_nothing
218 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
219 return false;
221 *vecotype_out = vecotype;
222 if (vecitype_out)
223 *vecitype_out = vecitype;
224 return true;
227 /* Round bit precision PRECISION up to a full element. */
229 static unsigned int
230 vect_element_precision (unsigned int precision)
232 precision = 1 << ceil_log2 (precision);
233 return MAX (precision, BITS_PER_UNIT);
236 /* If OP is defined by a statement that's being considered for vectorization,
237 return information about that statement, otherwise return NULL. */
239 static stmt_vec_info
240 vect_get_internal_def (vec_info *vinfo, tree op)
242 vect_def_type dt;
243 gimple *def_stmt;
244 if (TREE_CODE (op) != SSA_NAME
245 || !vect_is_simple_use (op, vinfo, &dt, &def_stmt)
246 || dt != vect_internal_def)
247 return NULL;
249 return vinfo_for_stmt (def_stmt);
252 /* Check whether NAME, an ssa-name used in USE_STMT,
253 is a result of a type promotion, such that:
254 DEF_STMT: NAME = NOP (name0)
255 If CHECK_SIGN is TRUE, check that either both types are signed or both are
256 unsigned. */
258 static bool
259 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
260 tree *orig_type, gimple **def_stmt, bool *promotion)
262 stmt_vec_info stmt_vinfo;
263 tree type = TREE_TYPE (name);
264 tree oprnd0;
265 enum vect_def_type dt;
267 stmt_vinfo = vinfo_for_stmt (use_stmt);
268 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, def_stmt))
269 return false;
271 if (dt != vect_internal_def
272 && dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 if (!*def_stmt)
276 return false;
278 if (!is_gimple_assign (*def_stmt))
279 return false;
281 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
282 return false;
284 oprnd0 = gimple_assign_rhs1 (*def_stmt);
286 *orig_type = TREE_TYPE (oprnd0);
287 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
288 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
289 return false;
291 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
292 *promotion = true;
293 else
294 *promotion = false;
296 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
297 return false;
299 return true;
302 /* Holds information about an input operand after some sign changes
303 and type promotions have been peeled away. */
304 struct vect_unpromoted_value {
305 vect_unpromoted_value ();
307 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
309 /* The value obtained after peeling away zero or more casts. */
310 tree op;
312 /* The type of OP. */
313 tree type;
315 /* The definition type of OP. */
316 vect_def_type dt;
318 /* If OP is the result of peeling at least one cast, and if the cast
319 of OP itself is a vectorizable statement, CASTER identifies that
320 statement, otherwise it is null. */
321 stmt_vec_info caster;
324 inline vect_unpromoted_value::vect_unpromoted_value ()
325 : op (NULL_TREE),
326 type (NULL_TREE),
327 dt (vect_uninitialized_def),
328 caster (NULL)
332 /* Set the operand to OP_IN, its definition type to DT_IN, and the
333 statement that casts it to CASTER_IN. */
335 inline void
336 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
337 stmt_vec_info caster_in)
339 op = op_in;
340 type = TREE_TYPE (op);
341 dt = dt_in;
342 caster = caster_in;
345 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
346 to reach some vectorizable inner operand OP', continuing as long as it
347 is possible to convert OP' back to OP using a possible sign change
348 followed by a possible promotion P. Return this OP', or null if OP is
349 not a vectorizable SSA name. If there is a promotion P, describe its
350 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
351 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
352 have more than one user.
354 A successful return means that it is possible to go from OP' to OP
355 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
356 whereas the cast from UNPROM to OP might be a promotion, a sign
357 change, or a nop.
359 E.g. say we have:
361 signed short *ptr = ...;
362 signed short C = *ptr;
363 unsigned short B = (unsigned short) C; // sign change
364 signed int A = (signed int) B; // unsigned promotion
365 ...possible other uses of A...
366 unsigned int OP = (unsigned int) A; // sign change
368 In this case it's possible to go directly from C to OP using:
370 OP = (unsigned int) (unsigned short) C;
371 +------------+ +--------------+
372 promotion sign change
374 so OP' would be C. The input to the promotion is B, so UNPROM
375 would describe B. */
377 static tree
378 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
379 vect_unpromoted_value *unprom,
380 bool *single_use_p = NULL)
382 tree res = NULL_TREE;
383 tree op_type = TREE_TYPE (op);
384 unsigned int orig_precision = TYPE_PRECISION (op_type);
385 stmt_vec_info caster = NULL;
386 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
388 /* See whether OP is simple enough to vectorize. */
389 gimple *def_stmt;
390 vect_def_type dt;
391 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt))
392 break;
394 /* If OP is the input of a demotion, skip over it to see whether
395 OP is itself the result of a promotion. If so, the combined
396 effect of the promotion and the demotion might fit the required
397 pattern, otherwise neither operation fits.
399 This copes with cases such as the result of an arithmetic
400 operation being truncated before being stored, and where that
401 arithmetic operation has been recognized as an over-widened one. */
402 if (TYPE_PRECISION (op_type) <= orig_precision)
404 /* Use OP as the UNPROM described above if we haven't yet
405 found a promotion, or if using the new input preserves the
406 sign of the previous promotion. */
407 if (!res
408 || TYPE_PRECISION (unprom->type) == orig_precision
409 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
410 unprom->set_op (op, dt, caster);
411 /* Stop if we've already seen a promotion and if this
412 conversion does more than change the sign. */
413 else if (TYPE_PRECISION (op_type)
414 != TYPE_PRECISION (unprom->type))
415 break;
417 /* The sequence now extends to OP. */
418 res = op;
421 /* See whether OP is defined by a cast. Record it as CASTER if
422 the cast is potentially vectorizable. */
423 if (!def_stmt)
424 break;
425 if (dt == vect_internal_def)
427 caster = vinfo_for_stmt (def_stmt);
428 /* Ignore pattern statements, since we don't link uses for them. */
429 if (single_use_p
430 && !STMT_VINFO_RELATED_STMT (caster)
431 && !has_single_use (res))
432 *single_use_p = false;
434 else
435 caster = NULL;
436 gassign *assign = dyn_cast <gassign *> (def_stmt);
437 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
438 break;
440 /* Continue with the input to the cast. */
441 op = gimple_assign_rhs1 (def_stmt);
442 op_type = TREE_TYPE (op);
444 return res;
447 /* OP is an integer operand to an operation that returns TYPE, and we
448 want to treat the operation as a widening one. So far we can treat
449 it as widening from *COMMON_TYPE.
451 Return true if OP is suitable for such a widening operation,
452 either widening from *COMMON_TYPE or from some supertype of it.
453 Update *COMMON_TYPE to the supertype in the latter case.
455 SHIFT_P is true if OP is a shift amount. */
457 static bool
458 vect_joust_widened_integer (tree type, bool shift_p, tree op,
459 tree *common_type)
461 /* Calculate the minimum precision required by OP, without changing
462 the sign of either operand. */
463 unsigned int precision;
464 if (shift_p)
466 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
467 return false;
468 precision = TREE_INT_CST_LOW (op);
470 else
472 precision = wi::min_precision (wi::to_widest (op),
473 TYPE_SIGN (*common_type));
474 if (precision * 2 > TYPE_PRECISION (type))
475 return false;
478 /* If OP requires a wider type, switch to that type. The checks
479 above ensure that this is still narrower than the result. */
480 precision = vect_element_precision (precision);
481 if (TYPE_PRECISION (*common_type) < precision)
482 *common_type = build_nonstandard_integer_type
483 (precision, TYPE_UNSIGNED (*common_type));
484 return true;
487 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
488 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
490 static bool
491 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
493 if (types_compatible_p (*common_type, new_type))
494 return true;
496 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
497 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
498 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
499 return true;
501 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
502 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
503 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
505 *common_type = new_type;
506 return true;
509 /* We have mismatched signs, with the signed type being
510 no wider than the unsigned type. In this case we need
511 a wider signed type. */
512 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
513 TYPE_PRECISION (new_type));
514 precision *= 2;
515 if (precision * 2 > TYPE_PRECISION (type))
516 return false;
518 *common_type = build_nonstandard_integer_type (precision, false);
519 return true;
522 /* Check whether STMT_INFO can be viewed as a tree of integer operations
523 in which each node either performs CODE or WIDENED_CODE, and where
524 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
525 specifies the maximum number of leaf operands. SHIFT_P says whether
526 CODE and WIDENED_CODE are some sort of shift.
528 If STMT_INFO is such a tree, return the number of leaf operands
529 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
530 to a type that (a) is narrower than the result of STMT_INFO and
531 (b) can hold all leaf operand values.
533 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
534 exists. */
536 static unsigned int
537 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
538 tree_code widened_code, bool shift_p,
539 unsigned int max_nops,
540 vect_unpromoted_value *unprom, tree *common_type)
542 /* Check for an integer operation with the right code. */
543 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
544 if (!assign)
545 return 0;
547 tree_code rhs_code = gimple_assign_rhs_code (assign);
548 if (rhs_code != code && rhs_code != widened_code)
549 return 0;
551 tree type = gimple_expr_type (assign);
552 if (!INTEGRAL_TYPE_P (type))
553 return 0;
555 /* Assume that both operands will be leaf operands. */
556 max_nops -= 2;
558 /* Check the operands. */
559 unsigned int next_op = 0;
560 for (unsigned int i = 0; i < 2; ++i)
562 vect_unpromoted_value *this_unprom = &unprom[next_op];
563 unsigned int nops = 1;
564 tree op = gimple_op (assign, i + 1);
565 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
567 /* We already have a common type from earlier operands.
568 Update it to account for OP. */
569 this_unprom->set_op (op, vect_constant_def);
570 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
571 return 0;
573 else
575 /* Only allow shifts by constants. */
576 if (shift_p && i == 1)
577 return 0;
579 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
580 this_unprom))
581 return 0;
583 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
585 /* The operand isn't widened. If STMT_INFO has the code
586 for an unwidened operation, recursively check whether
587 this operand is a node of the tree. */
588 if (rhs_code != code
589 || max_nops == 0
590 || this_unprom->dt != vect_internal_def)
591 return 0;
593 /* Give back the leaf slot allocated above now that we're
594 not treating this as a leaf operand. */
595 max_nops += 1;
597 /* Recursively process the definition of the operand. */
598 stmt_vec_info def_stmt_info
599 = vinfo_for_stmt (SSA_NAME_DEF_STMT (this_unprom->op));
600 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
601 shift_p, max_nops, this_unprom,
602 common_type);
603 if (nops == 0)
604 return 0;
606 max_nops -= nops;
608 else
610 /* Make sure that the operand is narrower than the result. */
611 if (TYPE_PRECISION (this_unprom->type) * 2
612 > TYPE_PRECISION (type))
613 return 0;
615 /* Update COMMON_TYPE for the new operand. */
616 if (i == 0)
617 *common_type = this_unprom->type;
618 else if (!vect_joust_widened_type (type, this_unprom->type,
619 common_type))
620 return 0;
623 next_op += nops;
625 return next_op;
628 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
629 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
631 static tree
632 vect_recog_temp_ssa_var (tree type, gimple *stmt)
634 return make_temp_ssa_name (type, stmt, "patt");
637 /* STMT2_INFO describes a type conversion that could be split into STMT1
638 followed by a version of STMT2_INFO that takes NEW_RHS as its first
639 input. Try to do this using pattern statements, returning true on
640 success. */
642 static bool
643 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
644 gimple *stmt1, tree vectype)
646 if (is_pattern_stmt_p (stmt2_info))
648 /* STMT2_INFO is part of a pattern. Get the statement to which
649 the pattern is attached. */
650 stmt_vec_info orig_stmt2_info
651 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt2_info));
652 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE, vect_location,
657 "Splitting pattern statement: ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
661 /* Since STMT2_INFO is a pattern statement, we can change it
662 in-situ without worrying about changing the code for the
663 containing block. */
664 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
666 if (dump_enabled_p ())
668 dump_printf_loc (MSG_NOTE, vect_location, "into: ");
669 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
670 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
674 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
675 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info->stmt)
676 /* STMT2_INFO is the actual pattern statement. Add STMT1
677 to the end of the definition sequence. */
678 gimple_seq_add_stmt_without_update (def_seq, stmt1);
679 else
681 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
682 before it. */
683 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
684 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
686 return true;
688 else
690 /* STMT2_INFO doesn't yet have a pattern. Try to create a
691 two-statement pattern now. */
692 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
693 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
694 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
695 if (!lhs_vectype)
696 return false;
698 if (dump_enabled_p ())
700 dump_printf_loc (MSG_NOTE, vect_location,
701 "Splitting statement: ");
702 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
705 /* Add STMT1 as a singleton pattern definition sequence. */
706 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
707 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
708 gimple_seq_add_stmt_without_update (def_seq, stmt1);
710 /* Build the second of the two pattern statements. */
711 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
712 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
713 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
715 if (dump_enabled_p ())
717 dump_printf_loc (MSG_NOTE, vect_location,
718 "into pattern statements: ");
719 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
720 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
721 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt2, 0);
724 return true;
728 /* Convert UNPROM to TYPE and return the result, adding new statements
729 to STMT_INFO's pattern definition statements if no better way is
730 available. VECTYPE is the vector form of TYPE. */
732 static tree
733 vect_convert_input (stmt_vec_info stmt_info, tree type,
734 vect_unpromoted_value *unprom, tree vectype)
736 /* Check for a no-op conversion. */
737 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
738 return unprom->op;
740 /* Allow the caller to create constant vect_unpromoted_values. */
741 if (TREE_CODE (unprom->op) == INTEGER_CST)
742 return wide_int_to_tree (type, wi::to_widest (unprom->op));
744 /* See if we can reuse an existing result. */
745 if (unprom->caster)
747 tree lhs = gimple_get_lhs (unprom->caster->stmt);
748 if (types_compatible_p (TREE_TYPE (lhs), type))
749 return lhs;
752 /* We need a new conversion statement. */
753 tree new_op = vect_recog_temp_ssa_var (type, NULL);
754 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
756 /* If the operation is the input to a vectorizable cast, try splitting
757 that cast into two, taking the required result as a mid-way point. */
758 if (unprom->caster)
760 tree lhs = gimple_get_lhs (unprom->caster->stmt);
761 if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
762 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
763 && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
764 && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
765 return new_op;
768 /* If OP is an external value, see if we can insert the new statement
769 on an incoming edge. */
770 if (unprom->dt == vect_external_def)
771 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
773 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
774 gcc_assert (!new_bb);
775 return new_op;
778 /* As a (common) last resort, add the statement to the pattern itself. */
779 append_pattern_def_seq (stmt_info, new_stmt, vectype);
780 return new_op;
783 /* Invoke vect_convert_input for N elements of UNPROM and store the
784 result in the corresponding elements of RESULT. */
786 static void
787 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
788 tree *result, tree type, vect_unpromoted_value *unprom,
789 tree vectype)
791 for (unsigned int i = 0; i < n; ++i)
793 unsigned int j;
794 for (j = 0; j < i; ++j)
795 if (unprom[j].op == unprom[i].op)
796 break;
797 if (j < i)
798 result[i] = result[j];
799 else
800 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
804 /* The caller has created a (possibly empty) sequence of pattern definition
805 statements followed by a single statement PATTERN_STMT. Cast the result
806 of this final statement to TYPE. If a new statement is needed, add
807 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
808 and return the new statement, otherwise return PATTERN_STMT as-is.
809 VECITYPE is the vector form of PATTERN_STMT's result type. */
811 static gimple *
812 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
813 tree vecitype)
815 tree lhs = gimple_get_lhs (pattern_stmt);
816 if (!types_compatible_p (type, TREE_TYPE (lhs)))
818 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
819 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
820 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
822 return pattern_stmt;
825 /* Return true if STMT_VINFO describes a reduction for which reassociation
826 is allowed. If STMT_INFO is part of a group, assume that it's part of
827 a reduction chain and optimistically assume that all statements
828 except the last allow reassociation. */
830 static bool
831 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
833 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
834 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
835 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
838 /* As above, but also require it to have code CODE and to be a reduction
839 in the outermost loop. When returning true, store the operands in
840 *OP0_OUT and *OP1_OUT. */
842 static bool
843 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
844 tree *op0_out, tree *op1_out)
846 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
847 if (!loop_info)
848 return false;
850 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
851 if (!assign || gimple_assign_rhs_code (assign) != code)
852 return false;
854 /* We don't allow changing the order of the computation in the inner-loop
855 when doing outer-loop vectorization. */
856 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
857 if (loop && nested_in_vect_loop_p (loop, assign))
858 return false;
860 if (!vect_reassociating_reduction_p (stmt_info))
861 return false;
863 *op0_out = gimple_assign_rhs1 (assign);
864 *op1_out = gimple_assign_rhs2 (assign);
865 return true;
868 /* Function vect_recog_dot_prod_pattern
870 Try to find the following pattern:
872 type x_t, y_t;
873 TYPE1 prod;
874 TYPE2 sum = init;
875 loop:
876 sum_0 = phi <init, sum_1>
877 S1 x_t = ...
878 S2 y_t = ...
879 S3 x_T = (TYPE1) x_t;
880 S4 y_T = (TYPE1) y_t;
881 S5 prod = x_T * y_T;
882 [S6 prod = (TYPE2) prod; #optional]
883 S7 sum_1 = prod + sum_0;
885 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
886 same size of 'TYPE1' or bigger. This is a special case of a reduction
887 computation.
889 Input:
891 * STMTS: Contains a stmt from which the pattern search begins. In the
892 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
893 will be detected.
895 Output:
897 * TYPE_OUT: The type of the output of this pattern.
899 * Return value: A new stmt that will be used to replace the sequence of
900 stmts that constitute the pattern. In this case it will be:
901 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
903 Note: The dot-prod idiom is a widening reduction pattern that is
904 vectorized without preserving all the intermediate results. It
905 produces only N/2 (widened) results (by summing up pairs of
906 intermediate results) rather than all N results. Therefore, we
907 cannot allow this pattern when we want to get all the results and in
908 the correct order (as is the case when this computation is in an
909 inner-loop nested in an outer-loop that us being vectorized). */
911 static gimple *
912 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_out)
914 gimple *last_stmt = (*stmts)[0];
915 tree oprnd0, oprnd1;
916 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
917 vec_info *vinfo = stmt_vinfo->vinfo;
918 tree type, half_type;
919 gimple *pattern_stmt;
920 tree var;
922 /* Look for the following pattern
923 DX = (TYPE1) X;
924 DY = (TYPE1) Y;
925 DPROD = DX * DY;
926 DDPROD = (TYPE2) DPROD;
927 sum_1 = DDPROD + sum_0;
928 In which
929 - DX is double the size of X
930 - DY is double the size of Y
931 - DX, DY, DPROD all have the same type
932 - sum is the same size of DPROD or bigger
933 - sum has been recognized as a reduction variable.
935 This is equivalent to:
936 DPROD = X w* Y; #widen mult
937 sum_1 = DPROD w+ sum_0; #widen summation
939 DPROD = X w* Y; #widen mult
940 sum_1 = DPROD + sum_0; #summation
943 /* Starting from LAST_STMT, follow the defs of its uses in search
944 of the above pattern. */
946 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
947 &oprnd0, &oprnd1))
948 return NULL;
950 type = gimple_expr_type (last_stmt);
952 vect_unpromoted_value unprom_mult;
953 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
955 /* So far so good. Since last_stmt was detected as a (summation) reduction,
956 we know that oprnd1 is the reduction variable (defined by a loop-header
957 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
958 Left to check that oprnd0 is defined by a (widen_)mult_expr */
959 if (!oprnd0)
960 return NULL;
962 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
963 if (!mult_vinfo)
964 return NULL;
966 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
967 inside the loop (in case we are analyzing an outer-loop). */
968 vect_unpromoted_value unprom0[2];
969 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
970 false, 2, unprom0, &half_type))
971 return NULL;
973 /* If there are two widening operations, make sure they agree on
974 the sign of the extension. */
975 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
976 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
977 return NULL;
979 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
981 tree half_vectype;
982 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
983 type_out, &half_vectype))
984 return NULL;
986 /* Get the inputs in the appropriate types. */
987 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
988 tree mult_oprnd[2];
989 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
990 unprom0, half_vectype);
992 var = vect_recog_temp_ssa_var (type, NULL);
993 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
994 mult_oprnd[0], mult_oprnd[1], oprnd1);
996 return pattern_stmt;
1000 /* Function vect_recog_sad_pattern
1002 Try to find the following Sum of Absolute Difference (SAD) pattern:
1004 type x_t, y_t;
1005 signed TYPE1 diff, abs_diff;
1006 TYPE2 sum = init;
1007 loop:
1008 sum_0 = phi <init, sum_1>
1009 S1 x_t = ...
1010 S2 y_t = ...
1011 S3 x_T = (TYPE1) x_t;
1012 S4 y_T = (TYPE1) y_t;
1013 S5 diff = x_T - y_T;
1014 S6 abs_diff = ABS_EXPR <diff>;
1015 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1016 S8 sum_1 = abs_diff + sum_0;
1018 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1019 same size of 'TYPE1' or bigger. This is a special case of a reduction
1020 computation.
1022 Input:
1024 * STMTS: Contains a stmt from which the pattern search begins. In the
1025 example, when this function is called with S8, the pattern
1026 {S3,S4,S5,S6,S7,S8} will be detected.
1028 Output:
1030 * TYPE_OUT: The type of the output of this pattern.
1032 * Return value: A new stmt that will be used to replace the sequence of
1033 stmts that constitute the pattern. In this case it will be:
1034 SAD_EXPR <x_t, y_t, sum_0>
1037 static gimple *
1038 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_out)
1040 gimple *last_stmt = (*stmts)[0];
1041 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1042 vec_info *vinfo = stmt_vinfo->vinfo;
1043 tree half_type;
1045 /* Look for the following pattern
1046 DX = (TYPE1) X;
1047 DY = (TYPE1) Y;
1048 DDIFF = DX - DY;
1049 DAD = ABS_EXPR <DDIFF>;
1050 DDPROD = (TYPE2) DPROD;
1051 sum_1 = DAD + sum_0;
1052 In which
1053 - DX is at least double the size of X
1054 - DY is at least double the size of Y
1055 - DX, DY, DDIFF, DAD all have the same type
1056 - sum is the same size of DAD or bigger
1057 - sum has been recognized as a reduction variable.
1059 This is equivalent to:
1060 DDIFF = X w- Y; #widen sub
1061 DAD = ABS_EXPR <DDIFF>;
1062 sum_1 = DAD w+ sum_0; #widen summation
1064 DDIFF = X w- Y; #widen sub
1065 DAD = ABS_EXPR <DDIFF>;
1066 sum_1 = DAD + sum_0; #summation
1069 /* Starting from LAST_STMT, follow the defs of its uses in search
1070 of the above pattern. */
1072 tree plus_oprnd0, plus_oprnd1;
1073 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1074 &plus_oprnd0, &plus_oprnd1))
1075 return NULL;
1077 tree sum_type = gimple_expr_type (last_stmt);
1079 /* Any non-truncating sequence of conversions is OK here, since
1080 with a successful match, the result of the ABS(U) is known to fit
1081 within the nonnegative range of the result type. (It cannot be the
1082 negative of the minimum signed value due to the range of the widening
1083 MINUS_EXPR.) */
1084 vect_unpromoted_value unprom_abs;
1085 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1086 &unprom_abs);
1088 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1089 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1090 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1091 Then check that plus_oprnd0 is defined by an abs_expr. */
1093 if (!plus_oprnd0)
1094 return NULL;
1096 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1097 if (!abs_stmt_vinfo)
1098 return NULL;
1100 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1101 inside the loop (in case we are analyzing an outer-loop). */
1102 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1103 if (!abs_stmt
1104 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1105 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1106 return NULL;
1108 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1109 tree abs_type = TREE_TYPE (abs_oprnd);
1110 if (TYPE_UNSIGNED (abs_type))
1111 return NULL;
1113 /* Peel off conversions from the ABS input. This can involve sign
1114 changes (e.g. from an unsigned subtraction to a signed ABS input)
1115 or signed promotion, but it can't include unsigned promotion.
1116 (Note that ABS of an unsigned promotion should have been folded
1117 away before now anyway.) */
1118 vect_unpromoted_value unprom_diff;
1119 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1120 &unprom_diff);
1121 if (!abs_oprnd)
1122 return NULL;
1123 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1124 && TYPE_UNSIGNED (unprom_diff.type))
1125 return NULL;
1127 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1128 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1129 if (!diff_stmt_vinfo)
1130 return NULL;
1132 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1133 inside the loop (in case we are analyzing an outer-loop). */
1134 vect_unpromoted_value unprom[2];
1135 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1136 false, 2, unprom, &half_type))
1137 return NULL;
1139 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1141 tree half_vectype;
1142 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1143 type_out, &half_vectype))
1144 return NULL;
1146 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1147 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1148 tree sad_oprnd[2];
1149 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1150 unprom, half_vectype);
1152 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1153 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1154 sad_oprnd[1], plus_oprnd1);
1156 return pattern_stmt;
1159 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1160 so that it can be treated as though it had the form:
1162 A_TYPE a;
1163 B_TYPE b;
1164 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1165 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1166 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1167 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1168 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1170 Try to replace the pattern with:
1172 A_TYPE a;
1173 B_TYPE b;
1174 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1175 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1176 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1177 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1179 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1181 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1182 name of the pattern being matched, for dump purposes. */
1184 static gimple *
1185 vect_recog_widen_op_pattern (vec<gimple *> *stmts, tree *type_out,
1186 tree_code orig_code, tree_code wide_code,
1187 bool shift_p, const char *name)
1189 gimple *last_stmt = stmts->pop ();
1190 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
1192 vect_unpromoted_value unprom[2];
1193 tree half_type;
1194 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1195 shift_p, 2, unprom, &half_type))
1196 return NULL;
1198 /* Pattern detected. */
1199 vect_pattern_detected (name, last_stmt);
1201 tree type = gimple_expr_type (last_stmt);
1202 tree itype = type;
1203 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1204 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1205 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1206 TYPE_UNSIGNED (half_type));
1208 /* Check target support */
1209 tree vectype = get_vectype_for_scalar_type (half_type);
1210 tree vecitype = get_vectype_for_scalar_type (itype);
1211 enum tree_code dummy_code;
1212 int dummy_int;
1213 auto_vec<tree> dummy_vec;
1214 if (!vectype
1215 || !vecitype
1216 || !supportable_widening_operation (wide_code, last_stmt,
1217 vecitype, vectype,
1218 &dummy_code, &dummy_code,
1219 &dummy_int, &dummy_vec))
1220 return NULL;
1222 *type_out = get_vectype_for_scalar_type (type);
1223 if (!*type_out)
1224 return NULL;
1226 STMT_VINFO_PATTERN_DEF_SEQ (last_stmt_info) = NULL;
1227 tree oprnd[2];
1228 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1230 tree var = vect_recog_temp_ssa_var (itype, NULL);
1231 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1232 oprnd[0], oprnd[1]);
1234 stmts->safe_push (last_stmt);
1235 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1238 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1239 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1241 static gimple *
1242 vect_recog_widen_mult_pattern (vec<gimple *> *stmts, tree *type_out)
1244 return vect_recog_widen_op_pattern (stmts, type_out, MULT_EXPR,
1245 WIDEN_MULT_EXPR, false,
1246 "vect_recog_widen_mult_pattern");
1249 /* Function vect_recog_pow_pattern
1251 Try to find the following pattern:
1253 x = POW (y, N);
1255 with POW being one of pow, powf, powi, powif and N being
1256 either 2 or 0.5.
1258 Input:
1260 * LAST_STMT: A stmt from which the pattern search begins.
1262 Output:
1264 * TYPE_OUT: The type of the output of this pattern.
1266 * Return value: A new stmt that will be used to replace the sequence of
1267 stmts that constitute the pattern. In this case it will be:
1268 x = x * x
1270 x = sqrt (x)
1273 static gimple *
1274 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_out)
1276 gimple *last_stmt = (*stmts)[0];
1277 tree base, exp;
1278 gimple *stmt;
1279 tree var;
1281 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1282 return NULL;
1284 switch (gimple_call_combined_fn (last_stmt))
1286 CASE_CFN_POW:
1287 CASE_CFN_POWI:
1288 break;
1290 default:
1291 return NULL;
1294 base = gimple_call_arg (last_stmt, 0);
1295 exp = gimple_call_arg (last_stmt, 1);
1296 if (TREE_CODE (exp) != REAL_CST
1297 && TREE_CODE (exp) != INTEGER_CST)
1299 if (flag_unsafe_math_optimizations
1300 && TREE_CODE (base) == REAL_CST
1301 && !gimple_call_internal_p (last_stmt))
1303 combined_fn log_cfn;
1304 built_in_function exp_bfn;
1305 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1307 case BUILT_IN_POW:
1308 log_cfn = CFN_BUILT_IN_LOG;
1309 exp_bfn = BUILT_IN_EXP;
1310 break;
1311 case BUILT_IN_POWF:
1312 log_cfn = CFN_BUILT_IN_LOGF;
1313 exp_bfn = BUILT_IN_EXPF;
1314 break;
1315 case BUILT_IN_POWL:
1316 log_cfn = CFN_BUILT_IN_LOGL;
1317 exp_bfn = BUILT_IN_EXPL;
1318 break;
1319 default:
1320 return NULL;
1322 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1323 tree exp_decl = builtin_decl_implicit (exp_bfn);
1324 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1325 does that, but if C is a power of 2, we want to use
1326 exp2 (log2 (C) * x) in the non-vectorized version, but for
1327 vectorization we don't have vectorized exp2. */
1328 if (logc
1329 && TREE_CODE (logc) == REAL_CST
1330 && exp_decl
1331 && lookup_attribute ("omp declare simd",
1332 DECL_ATTRIBUTES (exp_decl)))
1334 cgraph_node *node = cgraph_node::get_create (exp_decl);
1335 if (node->simd_clones == NULL)
1337 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1338 || node->definition)
1339 return NULL;
1340 expand_simd_clones (node);
1341 if (node->simd_clones == NULL)
1342 return NULL;
1344 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1345 if (!*type_out)
1346 return NULL;
1347 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1348 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1349 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1350 new_pattern_def_seq (stmt_vinfo, g);
1351 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1352 g = gimple_build_call (exp_decl, 1, def);
1353 gimple_call_set_lhs (g, res);
1354 return g;
1358 return NULL;
1361 /* We now have a pow or powi builtin function call with a constant
1362 exponent. */
1364 /* Catch squaring. */
1365 if ((tree_fits_shwi_p (exp)
1366 && tree_to_shwi (exp) == 2)
1367 || (TREE_CODE (exp) == REAL_CST
1368 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1370 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1371 TREE_TYPE (base), type_out))
1372 return NULL;
1374 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1375 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1376 return stmt;
1379 /* Catch square root. */
1380 if (TREE_CODE (exp) == REAL_CST
1381 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1383 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1384 if (*type_out
1385 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1386 OPTIMIZE_FOR_SPEED))
1388 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1389 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1390 gimple_call_set_lhs (stmt, var);
1391 gimple_call_set_nothrow (stmt, true);
1392 return stmt;
1396 return NULL;
1400 /* Function vect_recog_widen_sum_pattern
1402 Try to find the following pattern:
1404 type x_t;
1405 TYPE x_T, sum = init;
1406 loop:
1407 sum_0 = phi <init, sum_1>
1408 S1 x_t = *p;
1409 S2 x_T = (TYPE) x_t;
1410 S3 sum_1 = x_T + sum_0;
1412 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1413 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1414 a special case of a reduction computation.
1416 Input:
1418 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1419 when this function is called with S3, the pattern {S2,S3} will be detected.
1421 Output:
1423 * TYPE_OUT: The type of the output of this pattern.
1425 * Return value: A new stmt that will be used to replace the sequence of
1426 stmts that constitute the pattern. In this case it will be:
1427 WIDEN_SUM <x_t, sum_0>
1429 Note: The widening-sum idiom is a widening reduction pattern that is
1430 vectorized without preserving all the intermediate results. It
1431 produces only N/2 (widened) results (by summing up pairs of
1432 intermediate results) rather than all N results. Therefore, we
1433 cannot allow this pattern when we want to get all the results and in
1434 the correct order (as is the case when this computation is in an
1435 inner-loop nested in an outer-loop that us being vectorized). */
1437 static gimple *
1438 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_out)
1440 gimple *last_stmt = (*stmts)[0];
1441 tree oprnd0, oprnd1;
1442 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1443 vec_info *vinfo = stmt_vinfo->vinfo;
1444 tree type;
1445 gimple *pattern_stmt;
1446 tree var;
1448 /* Look for the following pattern
1449 DX = (TYPE) X;
1450 sum_1 = DX + sum_0;
1451 In which DX is at least double the size of X, and sum_1 has been
1452 recognized as a reduction variable.
1455 /* Starting from LAST_STMT, follow the defs of its uses in search
1456 of the above pattern. */
1458 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1459 &oprnd0, &oprnd1))
1460 return NULL;
1462 type = gimple_expr_type (last_stmt);
1464 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1465 we know that oprnd1 is the reduction variable (defined by a loop-header
1466 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1467 Left to check that oprnd0 is defined by a cast from type 'type' to type
1468 'TYPE'. */
1470 vect_unpromoted_value unprom0;
1471 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1472 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1473 return NULL;
1475 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1477 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1478 type_out))
1479 return NULL;
1481 var = vect_recog_temp_ssa_var (type, NULL);
1482 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1484 return pattern_stmt;
1487 /* Recognize cases in which an operation is performed in one type WTYPE
1488 but could be done more efficiently in a narrower type NTYPE. For example,
1489 if we have:
1491 ATYPE a; // narrower than NTYPE
1492 BTYPE b; // narrower than NTYPE
1493 WTYPE aw = (WTYPE) a;
1494 WTYPE bw = (WTYPE) b;
1495 WTYPE res = aw + bw; // only uses of aw and bw
1497 then it would be more efficient to do:
1499 NTYPE an = (NTYPE) a;
1500 NTYPE bn = (NTYPE) b;
1501 NTYPE resn = an + bn;
1502 WTYPE res = (WTYPE) resn;
1504 Other situations include things like:
1506 ATYPE a; // NTYPE or narrower
1507 WTYPE aw = (WTYPE) a;
1508 WTYPE res = aw + b;
1510 when only "(NTYPE) res" is significant. In that case it's more efficient
1511 to truncate "b" and do the operation on NTYPE instead:
1513 NTYPE an = (NTYPE) a;
1514 NTYPE bn = (NTYPE) b; // truncation
1515 NTYPE resn = an + bn;
1516 WTYPE res = (WTYPE) resn;
1518 All users of "res" should then use "resn" instead, making the final
1519 statement dead (not marked as relevant). The final statement is still
1520 needed to maintain the type correctness of the IR.
1522 vect_determine_precisions has already determined the minimum
1523 precison of the operation and the minimum precision required
1524 by users of the result. */
1526 static gimple *
1527 vect_recog_over_widening_pattern (vec<gimple *> *stmts, tree *type_out)
1529 gassign *last_stmt = dyn_cast <gassign *> (stmts->pop ());
1530 if (!last_stmt)
1531 return NULL;
1533 /* See whether we have found that this operation can be done on a
1534 narrower type without changing its semantics. */
1535 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
1536 unsigned int new_precision = last_stmt_info->operation_precision;
1537 if (!new_precision)
1538 return NULL;
1540 vec_info *vinfo = last_stmt_info->vinfo;
1541 tree lhs = gimple_assign_lhs (last_stmt);
1542 tree type = TREE_TYPE (lhs);
1543 tree_code code = gimple_assign_rhs_code (last_stmt);
1545 /* Keep the first operand of a COND_EXPR as-is: only the other two
1546 operands are interesting. */
1547 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1549 /* Check the operands. */
1550 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1551 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1552 unprom.quick_grow (nops);
1553 unsigned int min_precision = 0;
1554 bool single_use_p = false;
1555 for (unsigned int i = 0; i < nops; ++i)
1557 tree op = gimple_op (last_stmt, first_op + i);
1558 if (TREE_CODE (op) == INTEGER_CST)
1559 unprom[i].set_op (op, vect_constant_def);
1560 else if (TREE_CODE (op) == SSA_NAME)
1562 bool op_single_use_p = true;
1563 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1564 &op_single_use_p))
1565 return NULL;
1566 /* If:
1568 (1) N bits of the result are needed;
1569 (2) all inputs are widened from M<N bits; and
1570 (3) one operand OP is a single-use SSA name
1572 we can shift the M->N widening from OP to the output
1573 without changing the number or type of extensions involved.
1574 This then reduces the number of copies of STMT_INFO.
1576 If instead of (3) more than one operand is a single-use SSA name,
1577 shifting the extension to the output is even more of a win.
1579 If instead:
1581 (1) N bits of the result are needed;
1582 (2) one operand OP2 is widened from M2<N bits;
1583 (3) another operand OP1 is widened from M1<M2 bits; and
1584 (4) both OP1 and OP2 are single-use
1586 the choice is between:
1588 (a) truncating OP2 to M1, doing the operation on M1,
1589 and then widening the result to N
1591 (b) widening OP1 to M2, doing the operation on M2, and then
1592 widening the result to N
1594 Both shift the M2->N widening of the inputs to the output.
1595 (a) additionally shifts the M1->M2 widening to the output;
1596 it requires fewer copies of STMT_INFO but requires an extra
1597 M2->M1 truncation.
1599 Which is better will depend on the complexity and cost of
1600 STMT_INFO, which is hard to predict at this stage. However,
1601 a clear tie-breaker in favor of (b) is the fact that the
1602 truncation in (a) increases the length of the operation chain.
1604 If instead of (4) only one of OP1 or OP2 is single-use,
1605 (b) is still a win over doing the operation in N bits:
1606 it still shifts the M2->N widening on the single-use operand
1607 to the output and reduces the number of STMT_INFO copies.
1609 If neither operand is single-use then operating on fewer than
1610 N bits might lead to more extensions overall. Whether it does
1611 or not depends on global information about the vectorization
1612 region, and whether that's a good trade-off would again
1613 depend on the complexity and cost of the statements involved,
1614 as well as things like register pressure that are not normally
1615 modelled at this stage. We therefore ignore these cases
1616 and just optimize the clear single-use wins above.
1618 Thus we take the maximum precision of the unpromoted operands
1619 and record whether any operand is single-use. */
1620 if (unprom[i].dt == vect_internal_def)
1622 min_precision = MAX (min_precision,
1623 TYPE_PRECISION (unprom[i].type));
1624 single_use_p |= op_single_use_p;
1629 /* Although the operation could be done in operation_precision, we have
1630 to balance that against introducing extra truncations or extensions.
1631 Calculate the minimum precision that can be handled efficiently.
1633 The loop above determined that the operation could be handled
1634 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1635 extension from the inputs to the output without introducing more
1636 instructions, and would reduce the number of instructions required
1637 for STMT_INFO itself.
1639 vect_determine_precisions has also determined that the result only
1640 needs min_output_precision bits. Truncating by a factor of N times
1641 requires a tree of N - 1 instructions, so if TYPE is N times wider
1642 than min_output_precision, doing the operation in TYPE and truncating
1643 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1644 In contrast:
1646 - truncating the input to a unary operation and doing the operation
1647 in the new type requires at most N - 1 + 1 = N instructions per
1648 output vector
1650 - doing the same for a binary operation requires at most
1651 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1653 Both unary and binary operations require fewer instructions than
1654 this if the operands were extended from a suitable truncated form.
1655 Thus there is usually nothing to lose by doing operations in
1656 min_output_precision bits, but there can be something to gain. */
1657 if (!single_use_p)
1658 min_precision = last_stmt_info->min_output_precision;
1659 else
1660 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1662 /* Apply the minimum efficient precision we just calculated. */
1663 if (new_precision < min_precision)
1664 new_precision = min_precision;
1665 if (new_precision >= TYPE_PRECISION (type))
1666 return NULL;
1668 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1670 *type_out = get_vectype_for_scalar_type (type);
1671 if (!*type_out)
1672 return NULL;
1674 /* We've found a viable pattern. Get the new type of the operation. */
1675 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1676 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1678 /* We specifically don't check here whether the target supports the
1679 new operation, since it might be something that a later pattern
1680 wants to rewrite anyway. If targets have a minimum element size
1681 for some optabs, we should pattern-match smaller ops to larger ops
1682 where beneficial. */
1683 tree new_vectype = get_vectype_for_scalar_type (new_type);
1684 if (!new_vectype)
1685 return NULL;
1687 if (dump_enabled_p ())
1689 dump_printf_loc (MSG_NOTE, vect_location, "demoting ");
1690 dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
1691 dump_printf (MSG_NOTE, " to ");
1692 dump_generic_expr (MSG_NOTE, TDF_SLIM, new_type);
1693 dump_printf (MSG_NOTE, "\n");
1696 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1697 STMT_VINFO_PATTERN_DEF_SEQ (last_stmt_info) = NULL;
1698 tree ops[3] = {};
1699 for (unsigned int i = 1; i < first_op; ++i)
1700 ops[i - 1] = gimple_op (last_stmt, i);
1701 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1702 new_type, &unprom[0], new_vectype);
1704 /* Use the operation to produce a result of type NEW_TYPE. */
1705 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1706 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1707 ops[0], ops[1], ops[2]);
1708 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1710 if (dump_enabled_p ())
1712 dump_printf_loc (MSG_NOTE, vect_location,
1713 "created pattern stmt: ");
1714 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1717 pattern_stmt = vect_convert_output (last_stmt_info, type,
1718 pattern_stmt, new_vectype);
1720 stmts->safe_push (last_stmt);
1721 return pattern_stmt;
1724 /* Recognize the patterns:
1726 ATYPE a; // narrower than TYPE
1727 BTYPE b; // narrower than TYPE
1728 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1729 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1731 where only the bottom half of avg is used. Try to transform them into:
1733 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1734 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1736 followed by:
1738 TYPE avg = (TYPE) avg';
1740 where NTYPE is no wider than half of TYPE. Since only the bottom half
1741 of avg is used, all or part of the cast of avg' should become redundant. */
1743 static gimple *
1744 vect_recog_average_pattern (vec<gimple *> *stmts, tree *type_out)
1746 /* Check for a shift right by one bit. */
1747 gassign *last_stmt = dyn_cast <gassign *> (stmts->pop ());
1748 if (!last_stmt
1749 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1750 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1751 return NULL;
1753 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
1754 vec_info *vinfo = last_stmt_info->vinfo;
1756 /* Check that the shift result is wider than the users of the
1757 result need (i.e. that narrowing would be a natural choice). */
1758 tree lhs = gimple_assign_lhs (last_stmt);
1759 tree type = TREE_TYPE (lhs);
1760 unsigned int target_precision
1761 = vect_element_precision (last_stmt_info->min_output_precision);
1762 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1763 return NULL;
1765 /* Get the definition of the shift input. */
1766 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1767 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1768 if (!plus_stmt_info)
1769 return NULL;
1771 /* Check whether the shift input can be seen as a tree of additions on
1772 2 or 3 widened inputs.
1774 Note that the pattern should be a win even if the result of one or
1775 more additions is reused elsewhere: if the pattern matches, we'd be
1776 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1777 internal_fn ifn = IFN_AVG_FLOOR;
1778 vect_unpromoted_value unprom[3];
1779 tree new_type;
1780 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1781 PLUS_EXPR, false, 3,
1782 unprom, &new_type);
1783 if (nops == 0)
1784 return NULL;
1785 if (nops == 3)
1787 /* Check that one operand is 1. */
1788 unsigned int i;
1789 for (i = 0; i < 3; ++i)
1790 if (integer_onep (unprom[i].op))
1791 break;
1792 if (i == 3)
1793 return NULL;
1794 /* Throw away the 1 operand and keep the other two. */
1795 if (i < 2)
1796 unprom[i] = unprom[2];
1797 ifn = IFN_AVG_CEIL;
1800 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1802 /* We know that:
1804 (a) the operation can be viewed as:
1806 TYPE widened0 = (TYPE) UNPROM[0];
1807 TYPE widened1 = (TYPE) UNPROM[1];
1808 TYPE tmp1 = widened0 + widened1 {+ 1};
1809 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1811 (b) the first two statements are equivalent to:
1813 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1814 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1816 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1817 where sensible;
1819 (d) all the operations can be performed correctly at twice the width of
1820 NEW_TYPE, due to the nature of the average operation; and
1822 (e) users of the result of the right shift need only TARGET_PRECISION
1823 bits, where TARGET_PRECISION is no more than half of TYPE's
1824 precision.
1826 Under these circumstances, the only situation in which NEW_TYPE
1827 could be narrower than TARGET_PRECISION is if widened0, widened1
1828 and an addition result are all used more than once. Thus we can
1829 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1830 as "free", whereas widening the result of the average instruction
1831 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1832 therefore better not to go narrower than TARGET_PRECISION. */
1833 if (TYPE_PRECISION (new_type) < target_precision)
1834 new_type = build_nonstandard_integer_type (target_precision,
1835 TYPE_UNSIGNED (new_type));
1837 /* Check for target support. */
1838 tree new_vectype = get_vectype_for_scalar_type (new_type);
1839 if (!new_vectype
1840 || !direct_internal_fn_supported_p (ifn, new_vectype,
1841 OPTIMIZE_FOR_SPEED))
1842 return NULL;
1844 /* The IR requires a valid vector type for the cast result, even though
1845 it's likely to be discarded. */
1846 *type_out = get_vectype_for_scalar_type (type);
1847 if (!*type_out)
1848 return NULL;
1850 /* Generate the IFN_AVG* call. */
1851 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1852 tree new_ops[2];
1853 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1854 unprom, new_vectype);
1855 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1856 new_ops[1]);
1857 gimple_call_set_lhs (average_stmt, new_var);
1858 gimple_set_location (average_stmt, gimple_location (last_stmt));
1860 if (dump_enabled_p ())
1862 dump_printf_loc (MSG_NOTE, vect_location,
1863 "created pattern stmt: ");
1864 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0);
1867 stmts->safe_push (last_stmt);
1868 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1871 /* Recognize cases in which the input to a cast is wider than its
1872 output, and the input is fed by a widening operation. Fold this
1873 by removing the unnecessary intermediate widening. E.g.:
1875 unsigned char a;
1876 unsigned int b = (unsigned int) a;
1877 unsigned short c = (unsigned short) b;
1881 unsigned short c = (unsigned short) a;
1883 Although this is rare in input IR, it is an expected side-effect
1884 of the over-widening pattern above.
1886 This is beneficial also for integer-to-float conversions, if the
1887 widened integer has more bits than the float, and if the unwidened
1888 input doesn't. */
1890 static gimple *
1891 vect_recog_cast_forwprop_pattern (vec<gimple *> *stmts, tree *type_out)
1893 /* Check for a cast, including an integer-to-float conversion. */
1894 gassign *last_stmt = dyn_cast <gassign *> (stmts->pop ());
1895 if (!last_stmt)
1896 return NULL;
1897 tree_code code = gimple_assign_rhs_code (last_stmt);
1898 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1899 return NULL;
1901 /* Make sure that the rhs is a scalar with a natural bitsize. */
1902 tree lhs = gimple_assign_lhs (last_stmt);
1903 if (!lhs)
1904 return NULL;
1905 tree lhs_type = TREE_TYPE (lhs);
1906 scalar_mode lhs_mode;
1907 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1908 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1909 return NULL;
1911 /* Check for a narrowing operation (from a vector point of view). */
1912 tree rhs = gimple_assign_rhs1 (last_stmt);
1913 tree rhs_type = TREE_TYPE (rhs);
1914 if (!INTEGRAL_TYPE_P (rhs_type)
1915 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1916 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1917 return NULL;
1919 /* Try to find an unpromoted input. */
1920 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
1921 vec_info *vinfo = last_stmt_info->vinfo;
1922 vect_unpromoted_value unprom;
1923 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1924 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1925 return NULL;
1927 /* If the bits above RHS_TYPE matter, make sure that they're the
1928 same when extending from UNPROM as they are when extending from RHS. */
1929 if (!INTEGRAL_TYPE_P (lhs_type)
1930 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1931 return NULL;
1933 /* We can get the same result by casting UNPROM directly, to avoid
1934 the unnecessary widening and narrowing. */
1935 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1937 *type_out = get_vectype_for_scalar_type (lhs_type);
1938 if (!*type_out)
1939 return NULL;
1941 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1942 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1943 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1945 stmts->safe_push (last_stmt);
1946 return pattern_stmt;
1949 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1950 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1952 static gimple *
1953 vect_recog_widen_shift_pattern (vec<gimple *> *stmts, tree *type_out)
1955 return vect_recog_widen_op_pattern (stmts, type_out, LSHIFT_EXPR,
1956 WIDEN_LSHIFT_EXPR, true,
1957 "vect_recog_widen_shift_pattern");
1960 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1962 type a_t, b_t, c_t;
1964 S0 a_t = b_t r<< c_t;
1966 Input/Output:
1968 * STMTS: Contains a stmt from which the pattern search begins,
1969 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1970 with a sequence:
1972 S1 d_t = -c_t;
1973 S2 e_t = d_t & (B - 1);
1974 S3 f_t = b_t << c_t;
1975 S4 g_t = b_t >> e_t;
1976 S0 a_t = f_t | g_t;
1978 where B is element bitsize of type.
1980 Output:
1982 * TYPE_OUT: The type of the output of this pattern.
1984 * Return value: A new stmt that will be used to replace the rotate
1985 S0 stmt. */
1987 static gimple *
1988 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_out)
1990 gimple *last_stmt = stmts->pop ();
1991 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1992 gimple *pattern_stmt, *def_stmt;
1993 enum tree_code rhs_code;
1994 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1995 vec_info *vinfo = stmt_vinfo->vinfo;
1996 enum vect_def_type dt;
1997 optab optab1, optab2;
1998 edge ext_def = NULL;
2000 if (!is_gimple_assign (last_stmt))
2001 return NULL;
2003 rhs_code = gimple_assign_rhs_code (last_stmt);
2004 switch (rhs_code)
2006 case LROTATE_EXPR:
2007 case RROTATE_EXPR:
2008 break;
2009 default:
2010 return NULL;
2013 lhs = gimple_assign_lhs (last_stmt);
2014 oprnd0 = gimple_assign_rhs1 (last_stmt);
2015 type = TREE_TYPE (oprnd0);
2016 oprnd1 = gimple_assign_rhs2 (last_stmt);
2017 if (TREE_CODE (oprnd0) != SSA_NAME
2018 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2019 || !INTEGRAL_TYPE_P (type)
2020 || !TYPE_UNSIGNED (type))
2021 return NULL;
2023 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt))
2024 return NULL;
2026 if (dt != vect_internal_def
2027 && dt != vect_constant_def
2028 && dt != vect_external_def)
2029 return NULL;
2031 vectype = get_vectype_for_scalar_type (type);
2032 if (vectype == NULL_TREE)
2033 return NULL;
2035 /* If vector/vector or vector/scalar rotate is supported by the target,
2036 don't do anything here. */
2037 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2038 if (optab1
2039 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2040 return NULL;
2042 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2044 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2045 if (optab2
2046 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2047 return NULL;
2050 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2051 don't do anything here either. */
2052 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2053 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2054 if (!optab1
2055 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2056 || !optab2
2057 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2059 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2060 return NULL;
2061 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2062 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2063 if (!optab1
2064 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2065 || !optab2
2066 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2067 return NULL;
2070 *type_out = vectype;
2072 if (dt == vect_external_def
2073 && TREE_CODE (oprnd1) == SSA_NAME)
2074 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2076 def = NULL_TREE;
2077 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2078 if (TREE_CODE (oprnd1) == INTEGER_CST
2079 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2080 def = oprnd1;
2081 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2083 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2084 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2085 && TYPE_PRECISION (TREE_TYPE (rhs1))
2086 == TYPE_PRECISION (type))
2087 def = rhs1;
2090 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2091 if (def == NULL_TREE)
2093 def = vect_recog_temp_ssa_var (type, NULL);
2094 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2095 if (ext_def)
2097 basic_block new_bb
2098 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2099 gcc_assert (!new_bb);
2101 else
2102 append_pattern_def_seq (stmt_vinfo, def_stmt);
2104 stype = TREE_TYPE (def);
2105 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2107 if (TREE_CODE (def) == INTEGER_CST)
2109 if (!tree_fits_uhwi_p (def)
2110 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2111 || integer_zerop (def))
2112 return NULL;
2113 def2 = build_int_cst (stype,
2114 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2116 else
2118 tree vecstype = get_vectype_for_scalar_type (stype);
2119 stmt_vec_info def_stmt_vinfo;
2121 if (vecstype == NULL_TREE)
2122 return NULL;
2123 def2 = vect_recog_temp_ssa_var (stype, NULL);
2124 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2125 if (ext_def)
2127 basic_block new_bb
2128 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2129 gcc_assert (!new_bb);
2131 else
2133 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2134 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2135 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2136 append_pattern_def_seq (stmt_vinfo, def_stmt);
2139 def2 = vect_recog_temp_ssa_var (stype, NULL);
2140 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2141 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2142 gimple_assign_lhs (def_stmt), mask);
2143 if (ext_def)
2145 basic_block new_bb
2146 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2147 gcc_assert (!new_bb);
2149 else
2151 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2152 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2153 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2154 append_pattern_def_seq (stmt_vinfo, def_stmt);
2158 var1 = vect_recog_temp_ssa_var (type, NULL);
2159 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2160 ? LSHIFT_EXPR : RSHIFT_EXPR,
2161 oprnd0, def);
2162 append_pattern_def_seq (stmt_vinfo, def_stmt);
2164 var2 = vect_recog_temp_ssa_var (type, NULL);
2165 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2166 ? RSHIFT_EXPR : LSHIFT_EXPR,
2167 oprnd0, def2);
2168 append_pattern_def_seq (stmt_vinfo, def_stmt);
2170 /* Pattern detected. */
2171 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2173 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2174 var = vect_recog_temp_ssa_var (type, NULL);
2175 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2177 stmts->safe_push (last_stmt);
2178 return pattern_stmt;
2181 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2182 vectorized:
2184 type a_t;
2185 TYPE b_T, res_T;
2187 S1 a_t = ;
2188 S2 b_T = ;
2189 S3 res_T = b_T op a_t;
2191 where type 'TYPE' is a type with different size than 'type',
2192 and op is <<, >> or rotate.
2194 Also detect cases:
2196 type a_t;
2197 TYPE b_T, c_T, res_T;
2199 S0 c_T = ;
2200 S1 a_t = (type) c_T;
2201 S2 b_T = ;
2202 S3 res_T = b_T op a_t;
2204 Input/Output:
2206 * STMTS: Contains a stmt from which the pattern search begins,
2207 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2208 with a shift/rotate which has same type on both operands, in the
2209 second case just b_T op c_T, in the first case with added cast
2210 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2212 Output:
2214 * TYPE_OUT: The type of the output of this pattern.
2216 * Return value: A new stmt that will be used to replace the shift/rotate
2217 S3 stmt. */
2219 static gimple *
2220 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts, tree *type_out)
2222 gimple *last_stmt = stmts->pop ();
2223 tree oprnd0, oprnd1, lhs, var;
2224 gimple *pattern_stmt;
2225 enum tree_code rhs_code;
2226 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2227 vec_info *vinfo = stmt_vinfo->vinfo;
2229 if (!is_gimple_assign (last_stmt))
2230 return NULL;
2232 rhs_code = gimple_assign_rhs_code (last_stmt);
2233 switch (rhs_code)
2235 case LSHIFT_EXPR:
2236 case RSHIFT_EXPR:
2237 case LROTATE_EXPR:
2238 case RROTATE_EXPR:
2239 break;
2240 default:
2241 return NULL;
2244 lhs = gimple_assign_lhs (last_stmt);
2245 oprnd0 = gimple_assign_rhs1 (last_stmt);
2246 oprnd1 = gimple_assign_rhs2 (last_stmt);
2247 if (TREE_CODE (oprnd0) != SSA_NAME
2248 || TREE_CODE (oprnd1) != SSA_NAME
2249 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2250 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2251 || TYPE_PRECISION (TREE_TYPE (lhs))
2252 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2253 return NULL;
2255 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2256 if (!def_vinfo)
2257 return NULL;
2259 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2260 if (*type_out == NULL_TREE)
2261 return NULL;
2263 tree def = NULL_TREE;
2264 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2265 if (def_stmt && gimple_assign_cast_p (def_stmt))
2267 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2268 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2269 && TYPE_PRECISION (TREE_TYPE (rhs1))
2270 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2272 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2273 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2274 def = rhs1;
2275 else
2277 tree mask
2278 = build_low_bits_mask (TREE_TYPE (rhs1),
2279 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2280 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2281 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2282 stmt_vec_info new_stmt_info
2283 = new_stmt_vec_info (def_stmt, vinfo);
2284 set_vinfo_for_stmt (def_stmt, new_stmt_info);
2285 STMT_VINFO_VECTYPE (new_stmt_info)
2286 = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2287 new_pattern_def_seq (stmt_vinfo, def_stmt);
2292 if (def == NULL_TREE)
2294 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2295 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2296 new_pattern_def_seq (stmt_vinfo, def_stmt);
2299 /* Pattern detected. */
2300 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2302 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2303 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2304 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2306 stmts->safe_push (last_stmt);
2307 return pattern_stmt;
2310 /* Return true iff the target has a vector optab implementing the operation
2311 CODE on type VECTYPE. */
2313 static bool
2314 target_has_vecop_for_code (tree_code code, tree vectype)
2316 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2317 return voptab
2318 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2321 /* Verify that the target has optabs of VECTYPE to perform all the steps
2322 needed by the multiplication-by-immediate synthesis algorithm described by
2323 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2324 present. Return true iff the target supports all the steps. */
2326 static bool
2327 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2328 tree vectype, bool synth_shift_p)
2330 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2331 return false;
2333 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2334 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2336 if (var == negate_variant
2337 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2338 return false;
2340 /* If we must synthesize shifts with additions make sure that vector
2341 addition is available. */
2342 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2343 return false;
2345 for (int i = 1; i < alg->ops; i++)
2347 switch (alg->op[i])
2349 case alg_shift:
2350 break;
2351 case alg_add_t_m2:
2352 case alg_add_t2_m:
2353 case alg_add_factor:
2354 if (!supports_vplus)
2355 return false;
2356 break;
2357 case alg_sub_t_m2:
2358 case alg_sub_t2_m:
2359 case alg_sub_factor:
2360 if (!supports_vminus)
2361 return false;
2362 break;
2363 case alg_unknown:
2364 case alg_m:
2365 case alg_zero:
2366 case alg_impossible:
2367 return false;
2368 default:
2369 gcc_unreachable ();
2373 return true;
2376 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2377 putting the final result in DEST. Append all statements but the last into
2378 VINFO. Return the last statement. */
2380 static gimple *
2381 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2382 stmt_vec_info vinfo)
2384 HOST_WIDE_INT i;
2385 tree itype = TREE_TYPE (op);
2386 tree prev_res = op;
2387 gcc_assert (amnt >= 0);
2388 for (i = 0; i < amnt; i++)
2390 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2391 : dest;
2392 gimple *stmt
2393 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2394 prev_res = tmp_var;
2395 if (i < amnt - 1)
2396 append_pattern_def_seq (vinfo, stmt);
2397 else
2398 return stmt;
2400 gcc_unreachable ();
2401 return NULL;
2404 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2405 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2406 the process if necessary. Append the resulting assignment statements
2407 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2408 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2409 left shifts using additions. */
2411 static tree
2412 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2413 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2415 if (integer_zerop (op2)
2416 && (code == LSHIFT_EXPR
2417 || code == PLUS_EXPR))
2419 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2420 return op1;
2423 gimple *stmt;
2424 tree itype = TREE_TYPE (op1);
2425 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2427 if (code == LSHIFT_EXPR
2428 && synth_shift_p)
2430 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2431 stmt_vinfo);
2432 append_pattern_def_seq (stmt_vinfo, stmt);
2433 return tmp_var;
2436 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2437 append_pattern_def_seq (stmt_vinfo, stmt);
2438 return tmp_var;
2441 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2442 and simple arithmetic operations to be vectorized. Record the statements
2443 produced in STMT_VINFO and return the last statement in the sequence or
2444 NULL if it's not possible to synthesize such a multiplication.
2445 This function mirrors the behavior of expand_mult_const in expmed.c but
2446 works on tree-ssa form. */
2448 static gimple *
2449 vect_synth_mult_by_constant (tree op, tree val,
2450 stmt_vec_info stmt_vinfo)
2452 tree itype = TREE_TYPE (op);
2453 machine_mode mode = TYPE_MODE (itype);
2454 struct algorithm alg;
2455 mult_variant variant;
2456 if (!tree_fits_shwi_p (val))
2457 return NULL;
2459 /* Multiplication synthesis by shifts, adds and subs can introduce
2460 signed overflow where the original operation didn't. Perform the
2461 operations on an unsigned type and cast back to avoid this.
2462 In the future we may want to relax this for synthesis algorithms
2463 that we can prove do not cause unexpected overflow. */
2464 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2466 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2468 /* Targets that don't support vector shifts but support vector additions
2469 can synthesize shifts that way. */
2470 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2472 HOST_WIDE_INT hwval = tree_to_shwi (val);
2473 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2474 The vectorizer's benefit analysis will decide whether it's beneficial
2475 to do this. */
2476 bool possible = choose_mult_variant (mode, hwval, &alg,
2477 &variant, MAX_COST);
2478 if (!possible)
2479 return NULL;
2481 tree vectype = get_vectype_for_scalar_type (multtype);
2483 if (!vectype
2484 || !target_supports_mult_synth_alg (&alg, variant,
2485 vectype, synth_shift_p))
2486 return NULL;
2488 tree accumulator;
2490 /* Clear out the sequence of statements so we can populate it below. */
2491 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2492 gimple *stmt = NULL;
2494 if (cast_to_unsigned_p)
2496 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2497 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2498 append_pattern_def_seq (stmt_vinfo, stmt);
2499 op = tmp_op;
2502 if (alg.op[0] == alg_zero)
2503 accumulator = build_int_cst (multtype, 0);
2504 else
2505 accumulator = op;
2507 bool needs_fixup = (variant == negate_variant)
2508 || (variant == add_variant);
2510 for (int i = 1; i < alg.ops; i++)
2512 tree shft_log = build_int_cst (multtype, alg.log[i]);
2513 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2514 tree tmp_var = NULL_TREE;
2516 switch (alg.op[i])
2518 case alg_shift:
2519 if (synth_shift_p)
2520 stmt
2521 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2522 stmt_vinfo);
2523 else
2524 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2525 shft_log);
2526 break;
2527 case alg_add_t_m2:
2528 tmp_var
2529 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2530 stmt_vinfo, synth_shift_p);
2531 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2532 tmp_var);
2533 break;
2534 case alg_sub_t_m2:
2535 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2536 shft_log, stmt_vinfo,
2537 synth_shift_p);
2538 /* In some algorithms the first step involves zeroing the
2539 accumulator. If subtracting from such an accumulator
2540 just emit the negation directly. */
2541 if (integer_zerop (accumulator))
2542 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2543 else
2544 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2545 tmp_var);
2546 break;
2547 case alg_add_t2_m:
2548 tmp_var
2549 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2550 stmt_vinfo, synth_shift_p);
2551 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2552 break;
2553 case alg_sub_t2_m:
2554 tmp_var
2555 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2556 stmt_vinfo, synth_shift_p);
2557 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2558 break;
2559 case alg_add_factor:
2560 tmp_var
2561 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2562 stmt_vinfo, synth_shift_p);
2563 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2564 tmp_var);
2565 break;
2566 case alg_sub_factor:
2567 tmp_var
2568 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2569 stmt_vinfo, synth_shift_p);
2570 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2571 accumulator);
2572 break;
2573 default:
2574 gcc_unreachable ();
2576 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2577 but rather return it directly. */
2579 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2580 append_pattern_def_seq (stmt_vinfo, stmt);
2581 accumulator = accum_tmp;
2583 if (variant == negate_variant)
2585 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2586 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2587 accumulator = accum_tmp;
2588 if (cast_to_unsigned_p)
2589 append_pattern_def_seq (stmt_vinfo, stmt);
2591 else if (variant == add_variant)
2593 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2594 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2595 accumulator = accum_tmp;
2596 if (cast_to_unsigned_p)
2597 append_pattern_def_seq (stmt_vinfo, stmt);
2599 /* Move back to a signed if needed. */
2600 if (cast_to_unsigned_p)
2602 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2603 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2606 return stmt;
2609 /* Detect multiplication by constant and convert it into a sequence of
2610 shifts and additions, subtractions, negations. We reuse the
2611 choose_mult_variant algorithms from expmed.c
2613 Input/Output:
2615 STMTS: Contains a stmt from which the pattern search begins,
2616 i.e. the mult stmt.
2618 Output:
2620 * TYPE_OUT: The type of the output of this pattern.
2622 * Return value: A new stmt that will be used to replace
2623 the multiplication. */
2625 static gimple *
2626 vect_recog_mult_pattern (vec<gimple *> *stmts, tree *type_out)
2628 gimple *last_stmt = stmts->pop ();
2629 tree oprnd0, oprnd1, vectype, itype;
2630 gimple *pattern_stmt;
2631 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2633 if (!is_gimple_assign (last_stmt))
2634 return NULL;
2636 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2637 return NULL;
2639 oprnd0 = gimple_assign_rhs1 (last_stmt);
2640 oprnd1 = gimple_assign_rhs2 (last_stmt);
2641 itype = TREE_TYPE (oprnd0);
2643 if (TREE_CODE (oprnd0) != SSA_NAME
2644 || TREE_CODE (oprnd1) != INTEGER_CST
2645 || !INTEGRAL_TYPE_P (itype)
2646 || !type_has_mode_precision_p (itype))
2647 return NULL;
2649 vectype = get_vectype_for_scalar_type (itype);
2650 if (vectype == NULL_TREE)
2651 return NULL;
2653 /* If the target can handle vectorized multiplication natively,
2654 don't attempt to optimize this. */
2655 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2656 if (mul_optab != unknown_optab)
2658 machine_mode vec_mode = TYPE_MODE (vectype);
2659 int icode = (int) optab_handler (mul_optab, vec_mode);
2660 if (icode != CODE_FOR_nothing)
2661 return NULL;
2664 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2665 if (!pattern_stmt)
2666 return NULL;
2668 /* Pattern detected. */
2669 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2671 stmts->safe_push (last_stmt);
2672 *type_out = vectype;
2674 return pattern_stmt;
2677 /* Detect a signed division by a constant that wouldn't be
2678 otherwise vectorized:
2680 type a_t, b_t;
2682 S1 a_t = b_t / N;
2684 where type 'type' is an integral type and N is a constant.
2686 Similarly handle modulo by a constant:
2688 S4 a_t = b_t % N;
2690 Input/Output:
2692 * STMTS: Contains a stmt from which the pattern search begins,
2693 i.e. the division stmt. S1 is replaced by if N is a power
2694 of two constant and type is signed:
2695 S3 y_t = b_t < 0 ? N - 1 : 0;
2696 S2 x_t = b_t + y_t;
2697 S1' a_t = x_t >> log2 (N);
2699 S4 is replaced if N is a power of two constant and
2700 type is signed by (where *_T temporaries have unsigned type):
2701 S9 y_T = b_t < 0 ? -1U : 0U;
2702 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2703 S7 z_t = (type) z_T;
2704 S6 w_t = b_t + z_t;
2705 S5 x_t = w_t & (N - 1);
2706 S4' a_t = x_t - z_t;
2708 Output:
2710 * TYPE_OUT: The type of the output of this pattern.
2712 * Return value: A new stmt that will be used to replace the division
2713 S1 or modulo S4 stmt. */
2715 static gimple *
2716 vect_recog_divmod_pattern (vec<gimple *> *stmts, tree *type_out)
2718 gimple *last_stmt = stmts->pop ();
2719 tree oprnd0, oprnd1, vectype, itype, cond;
2720 gimple *pattern_stmt, *def_stmt;
2721 enum tree_code rhs_code;
2722 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2723 vec_info *vinfo = stmt_vinfo->vinfo;
2724 optab optab;
2725 tree q;
2726 int dummy_int, prec;
2727 stmt_vec_info def_stmt_vinfo;
2729 if (!is_gimple_assign (last_stmt))
2730 return NULL;
2732 rhs_code = gimple_assign_rhs_code (last_stmt);
2733 switch (rhs_code)
2735 case TRUNC_DIV_EXPR:
2736 case TRUNC_MOD_EXPR:
2737 break;
2738 default:
2739 return NULL;
2742 oprnd0 = gimple_assign_rhs1 (last_stmt);
2743 oprnd1 = gimple_assign_rhs2 (last_stmt);
2744 itype = TREE_TYPE (oprnd0);
2745 if (TREE_CODE (oprnd0) != SSA_NAME
2746 || TREE_CODE (oprnd1) != INTEGER_CST
2747 || TREE_CODE (itype) != INTEGER_TYPE
2748 || !type_has_mode_precision_p (itype))
2749 return NULL;
2751 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2752 vectype = get_vectype_for_scalar_type (itype);
2753 if (vectype == NULL_TREE)
2754 return NULL;
2756 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2758 /* If the target can handle vectorized division or modulo natively,
2759 don't attempt to optimize this, since native division is likely
2760 to give smaller code. */
2761 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2762 if (optab != unknown_optab)
2764 machine_mode vec_mode = TYPE_MODE (vectype);
2765 int icode = (int) optab_handler (optab, vec_mode);
2766 if (icode != CODE_FOR_nothing)
2767 return NULL;
2771 prec = TYPE_PRECISION (itype);
2772 if (integer_pow2p (oprnd1))
2774 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2775 return NULL;
2777 /* Pattern detected. */
2778 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2780 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2781 build_int_cst (itype, 0));
2782 if (rhs_code == TRUNC_DIV_EXPR)
2784 tree var = vect_recog_temp_ssa_var (itype, NULL);
2785 tree shift;
2786 def_stmt
2787 = gimple_build_assign (var, COND_EXPR, cond,
2788 fold_build2 (MINUS_EXPR, itype, oprnd1,
2789 build_int_cst (itype, 1)),
2790 build_int_cst (itype, 0));
2791 new_pattern_def_seq (stmt_vinfo, def_stmt);
2792 var = vect_recog_temp_ssa_var (itype, NULL);
2793 def_stmt
2794 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2795 gimple_assign_lhs (def_stmt));
2796 append_pattern_def_seq (stmt_vinfo, def_stmt);
2798 shift = build_int_cst (itype, tree_log2 (oprnd1));
2799 pattern_stmt
2800 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2801 RSHIFT_EXPR, var, shift);
2803 else
2805 tree signmask;
2806 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2807 if (compare_tree_int (oprnd1, 2) == 0)
2809 signmask = vect_recog_temp_ssa_var (itype, NULL);
2810 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2811 build_int_cst (itype, 1),
2812 build_int_cst (itype, 0));
2813 append_pattern_def_seq (stmt_vinfo, def_stmt);
2815 else
2817 tree utype
2818 = build_nonstandard_integer_type (prec, 1);
2819 tree vecutype = get_vectype_for_scalar_type (utype);
2820 tree shift
2821 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2822 - tree_log2 (oprnd1));
2823 tree var = vect_recog_temp_ssa_var (utype, NULL);
2825 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2826 build_int_cst (utype, -1),
2827 build_int_cst (utype, 0));
2828 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2829 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2830 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2831 append_pattern_def_seq (stmt_vinfo, def_stmt);
2832 var = vect_recog_temp_ssa_var (utype, NULL);
2833 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2834 gimple_assign_lhs (def_stmt),
2835 shift);
2836 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2837 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2838 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2839 append_pattern_def_seq (stmt_vinfo, def_stmt);
2840 signmask = vect_recog_temp_ssa_var (itype, NULL);
2841 def_stmt
2842 = gimple_build_assign (signmask, NOP_EXPR, var);
2843 append_pattern_def_seq (stmt_vinfo, def_stmt);
2845 def_stmt
2846 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2847 PLUS_EXPR, oprnd0, signmask);
2848 append_pattern_def_seq (stmt_vinfo, def_stmt);
2849 def_stmt
2850 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2851 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2852 fold_build2 (MINUS_EXPR, itype, oprnd1,
2853 build_int_cst (itype, 1)));
2854 append_pattern_def_seq (stmt_vinfo, def_stmt);
2856 pattern_stmt
2857 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2858 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2859 signmask);
2862 stmts->safe_push (last_stmt);
2864 *type_out = vectype;
2865 return pattern_stmt;
2868 if (prec > HOST_BITS_PER_WIDE_INT
2869 || integer_zerop (oprnd1))
2870 return NULL;
2872 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2873 return NULL;
2875 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2877 if (TYPE_UNSIGNED (itype))
2879 unsigned HOST_WIDE_INT mh, ml;
2880 int pre_shift, post_shift;
2881 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2882 & GET_MODE_MASK (itype_mode));
2883 tree t1, t2, t3, t4;
2885 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2886 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2887 return NULL;
2889 /* Find a suitable multiplier and right shift count
2890 instead of multiplying with D. */
2891 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2893 /* If the suggested multiplier is more than SIZE bits, we can do better
2894 for even divisors, using an initial right shift. */
2895 if (mh != 0 && (d & 1) == 0)
2897 pre_shift = ctz_or_zero (d);
2898 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2899 &ml, &post_shift, &dummy_int);
2900 gcc_assert (!mh);
2902 else
2903 pre_shift = 0;
2905 if (mh != 0)
2907 if (post_shift - 1 >= prec)
2908 return NULL;
2910 /* t1 = oprnd0 h* ml;
2911 t2 = oprnd0 - t1;
2912 t3 = t2 >> 1;
2913 t4 = t1 + t3;
2914 q = t4 >> (post_shift - 1); */
2915 t1 = vect_recog_temp_ssa_var (itype, NULL);
2916 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2917 build_int_cst (itype, ml));
2918 append_pattern_def_seq (stmt_vinfo, def_stmt);
2920 t2 = vect_recog_temp_ssa_var (itype, NULL);
2921 def_stmt
2922 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2923 append_pattern_def_seq (stmt_vinfo, def_stmt);
2925 t3 = vect_recog_temp_ssa_var (itype, NULL);
2926 def_stmt
2927 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2928 append_pattern_def_seq (stmt_vinfo, def_stmt);
2930 t4 = vect_recog_temp_ssa_var (itype, NULL);
2931 def_stmt
2932 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2934 if (post_shift != 1)
2936 append_pattern_def_seq (stmt_vinfo, def_stmt);
2938 q = vect_recog_temp_ssa_var (itype, NULL);
2939 pattern_stmt
2940 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2941 build_int_cst (itype, post_shift - 1));
2943 else
2945 q = t4;
2946 pattern_stmt = def_stmt;
2949 else
2951 if (pre_shift >= prec || post_shift >= prec)
2952 return NULL;
2954 /* t1 = oprnd0 >> pre_shift;
2955 t2 = t1 h* ml;
2956 q = t2 >> post_shift; */
2957 if (pre_shift)
2959 t1 = vect_recog_temp_ssa_var (itype, NULL);
2960 def_stmt
2961 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2962 build_int_cst (NULL, pre_shift));
2963 append_pattern_def_seq (stmt_vinfo, def_stmt);
2965 else
2966 t1 = oprnd0;
2968 t2 = vect_recog_temp_ssa_var (itype, NULL);
2969 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2970 build_int_cst (itype, ml));
2972 if (post_shift)
2974 append_pattern_def_seq (stmt_vinfo, def_stmt);
2976 q = vect_recog_temp_ssa_var (itype, NULL);
2977 def_stmt
2978 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2979 build_int_cst (itype, post_shift));
2981 else
2982 q = t2;
2984 pattern_stmt = def_stmt;
2987 else
2989 unsigned HOST_WIDE_INT ml;
2990 int post_shift;
2991 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2992 unsigned HOST_WIDE_INT abs_d;
2993 bool add = false;
2994 tree t1, t2, t3, t4;
2996 /* Give up for -1. */
2997 if (d == -1)
2998 return NULL;
3000 /* Since d might be INT_MIN, we have to cast to
3001 unsigned HOST_WIDE_INT before negating to avoid
3002 undefined signed overflow. */
3003 abs_d = (d >= 0
3004 ? (unsigned HOST_WIDE_INT) d
3005 : - (unsigned HOST_WIDE_INT) d);
3007 /* n rem d = n rem -d */
3008 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3010 d = abs_d;
3011 oprnd1 = build_int_cst (itype, abs_d);
3013 else if (HOST_BITS_PER_WIDE_INT >= prec
3014 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3015 /* This case is not handled correctly below. */
3016 return NULL;
3018 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3019 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3021 add = true;
3022 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3024 if (post_shift >= prec)
3025 return NULL;
3027 /* t1 = oprnd0 h* ml; */
3028 t1 = vect_recog_temp_ssa_var (itype, NULL);
3029 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3030 build_int_cst (itype, ml));
3032 if (add)
3034 /* t2 = t1 + oprnd0; */
3035 append_pattern_def_seq (stmt_vinfo, def_stmt);
3036 t2 = vect_recog_temp_ssa_var (itype, NULL);
3037 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3039 else
3040 t2 = t1;
3042 if (post_shift)
3044 /* t3 = t2 >> post_shift; */
3045 append_pattern_def_seq (stmt_vinfo, def_stmt);
3046 t3 = vect_recog_temp_ssa_var (itype, NULL);
3047 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3048 build_int_cst (itype, post_shift));
3050 else
3051 t3 = t2;
3053 wide_int oprnd0_min, oprnd0_max;
3054 int msb = 1;
3055 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3057 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3058 msb = 0;
3059 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3060 msb = -1;
3063 if (msb == 0 && d >= 0)
3065 /* q = t3; */
3066 q = t3;
3067 pattern_stmt = def_stmt;
3069 else
3071 /* t4 = oprnd0 >> (prec - 1);
3072 or if we know from VRP that oprnd0 >= 0
3073 t4 = 0;
3074 or if we know from VRP that oprnd0 < 0
3075 t4 = -1; */
3076 append_pattern_def_seq (stmt_vinfo, def_stmt);
3077 t4 = vect_recog_temp_ssa_var (itype, NULL);
3078 if (msb != 1)
3079 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3080 build_int_cst (itype, msb));
3081 else
3082 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3083 build_int_cst (itype, prec - 1));
3084 append_pattern_def_seq (stmt_vinfo, def_stmt);
3086 /* q = t3 - t4; or q = t4 - t3; */
3087 q = vect_recog_temp_ssa_var (itype, NULL);
3088 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3089 d < 0 ? t3 : t4);
3093 if (rhs_code == TRUNC_MOD_EXPR)
3095 tree r, t1;
3097 /* We divided. Now finish by:
3098 t1 = q * oprnd1;
3099 r = oprnd0 - t1; */
3100 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3102 t1 = vect_recog_temp_ssa_var (itype, NULL);
3103 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3104 append_pattern_def_seq (stmt_vinfo, def_stmt);
3106 r = vect_recog_temp_ssa_var (itype, NULL);
3107 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3110 /* Pattern detected. */
3111 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3113 stmts->safe_push (last_stmt);
3115 *type_out = vectype;
3116 return pattern_stmt;
3119 /* Function vect_recog_mixed_size_cond_pattern
3121 Try to find the following pattern:
3123 type x_t, y_t;
3124 TYPE a_T, b_T, c_T;
3125 loop:
3126 S1 a_T = x_t CMP y_t ? b_T : c_T;
3128 where type 'TYPE' is an integral type which has different size
3129 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3130 than 'type', the constants need to fit into an integer type
3131 with the same width as 'type') or results of conversion from 'type'.
3133 Input:
3135 * LAST_STMT: A stmt from which the pattern search begins.
3137 Output:
3139 * TYPE_OUT: The type of the output of this pattern.
3141 * Return value: A new stmt that will be used to replace the pattern.
3142 Additionally a def_stmt is added.
3144 a_it = x_t CMP y_t ? b_it : c_it;
3145 a_T = (TYPE) a_it; */
3147 static gimple *
3148 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_out)
3150 gimple *last_stmt = (*stmts)[0];
3151 tree cond_expr, then_clause, else_clause;
3152 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3153 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3154 gimple *pattern_stmt, *def_stmt;
3155 vec_info *vinfo = stmt_vinfo->vinfo;
3156 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3157 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3158 bool promotion;
3159 tree comp_scalar_type;
3161 if (!is_gimple_assign (last_stmt)
3162 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3163 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3164 return NULL;
3166 cond_expr = gimple_assign_rhs1 (last_stmt);
3167 then_clause = gimple_assign_rhs2 (last_stmt);
3168 else_clause = gimple_assign_rhs3 (last_stmt);
3170 if (!COMPARISON_CLASS_P (cond_expr))
3171 return NULL;
3173 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3174 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3175 if (comp_vectype == NULL_TREE)
3176 return NULL;
3178 type = gimple_expr_type (last_stmt);
3179 if (types_compatible_p (type, comp_scalar_type)
3180 || ((TREE_CODE (then_clause) != INTEGER_CST
3181 || TREE_CODE (else_clause) != INTEGER_CST)
3182 && !INTEGRAL_TYPE_P (comp_scalar_type))
3183 || !INTEGRAL_TYPE_P (type))
3184 return NULL;
3186 if ((TREE_CODE (then_clause) != INTEGER_CST
3187 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3188 &def_stmt0, &promotion))
3189 || (TREE_CODE (else_clause) != INTEGER_CST
3190 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3191 &def_stmt1, &promotion)))
3192 return NULL;
3194 if (orig_type0 && orig_type1
3195 && !types_compatible_p (orig_type0, orig_type1))
3196 return NULL;
3198 if (orig_type0)
3200 if (!types_compatible_p (orig_type0, comp_scalar_type))
3201 return NULL;
3202 then_clause = gimple_assign_rhs1 (def_stmt0);
3203 itype = orig_type0;
3206 if (orig_type1)
3208 if (!types_compatible_p (orig_type1, comp_scalar_type))
3209 return NULL;
3210 else_clause = gimple_assign_rhs1 (def_stmt1);
3211 itype = orig_type1;
3215 HOST_WIDE_INT cmp_mode_size
3216 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3218 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3219 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3220 return NULL;
3222 vectype = get_vectype_for_scalar_type (type);
3223 if (vectype == NULL_TREE)
3224 return NULL;
3226 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3227 return NULL;
3229 if (itype == NULL_TREE)
3230 itype = build_nonstandard_integer_type (cmp_mode_size,
3231 TYPE_UNSIGNED (type));
3233 if (itype == NULL_TREE
3234 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3235 return NULL;
3237 vecitype = get_vectype_for_scalar_type (itype);
3238 if (vecitype == NULL_TREE)
3239 return NULL;
3241 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3242 return NULL;
3244 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3246 if ((TREE_CODE (then_clause) == INTEGER_CST
3247 && !int_fits_type_p (then_clause, itype))
3248 || (TREE_CODE (else_clause) == INTEGER_CST
3249 && !int_fits_type_p (else_clause, itype)))
3250 return NULL;
3253 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3254 COND_EXPR, unshare_expr (cond_expr),
3255 fold_convert (itype, then_clause),
3256 fold_convert (itype, else_clause));
3257 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3258 NOP_EXPR, gimple_assign_lhs (def_stmt));
3260 new_pattern_def_seq (stmt_vinfo, def_stmt);
3261 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3262 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3263 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3264 *type_out = vectype;
3266 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3268 return pattern_stmt;
3272 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3273 true if bool VAR can and should be optimized that way. Assume it shouldn't
3274 in case it's a result of a comparison which can be directly vectorized into
3275 a vector comparison. Fills in STMTS with all stmts visited during the
3276 walk. */
3278 static bool
3279 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3281 tree rhs1;
3282 enum tree_code rhs_code;
3284 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3285 if (!def_stmt_info)
3286 return false;
3288 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3289 if (!def_stmt)
3290 return false;
3292 if (stmts.contains (def_stmt))
3293 return true;
3295 rhs1 = gimple_assign_rhs1 (def_stmt);
3296 rhs_code = gimple_assign_rhs_code (def_stmt);
3297 switch (rhs_code)
3299 case SSA_NAME:
3300 if (! check_bool_pattern (rhs1, vinfo, stmts))
3301 return false;
3302 break;
3304 CASE_CONVERT:
3305 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3306 return false;
3307 if (! check_bool_pattern (rhs1, vinfo, stmts))
3308 return false;
3309 break;
3311 case BIT_NOT_EXPR:
3312 if (! check_bool_pattern (rhs1, vinfo, stmts))
3313 return false;
3314 break;
3316 case BIT_AND_EXPR:
3317 case BIT_IOR_EXPR:
3318 case BIT_XOR_EXPR:
3319 if (! check_bool_pattern (rhs1, vinfo, stmts)
3320 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3321 return false;
3322 break;
3324 default:
3325 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3327 tree vecitype, comp_vectype;
3329 /* If the comparison can throw, then is_gimple_condexpr will be
3330 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3331 if (stmt_could_throw_p (def_stmt))
3332 return false;
3334 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3335 if (comp_vectype == NULL_TREE)
3336 return false;
3338 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3339 if (mask_type
3340 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3341 return false;
3343 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3345 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3346 tree itype
3347 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3348 vecitype = get_vectype_for_scalar_type (itype);
3349 if (vecitype == NULL_TREE)
3350 return false;
3352 else
3353 vecitype = comp_vectype;
3354 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3355 return false;
3357 else
3358 return false;
3359 break;
3362 bool res = stmts.add (def_stmt);
3363 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3364 gcc_assert (!res);
3366 return true;
3370 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3371 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3372 pattern sequence. */
3374 static tree
3375 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3377 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3378 NOP_EXPR, var);
3379 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3380 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3381 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3382 append_pattern_def_seq (stmt_info, cast_stmt);
3383 return gimple_assign_lhs (cast_stmt);
3386 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3387 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3388 type, OUT_TYPE is the desired final integer type of the whole pattern.
3389 STMT_INFO is the info of the pattern root and is where pattern stmts should
3390 be associated with. DEFS is a map of pattern defs. */
3392 static void
3393 adjust_bool_pattern (tree var, tree out_type,
3394 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3396 gimple *stmt = SSA_NAME_DEF_STMT (var);
3397 enum tree_code rhs_code, def_rhs_code;
3398 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3399 location_t loc;
3400 gimple *pattern_stmt, *def_stmt;
3401 tree trueval = NULL_TREE;
3403 rhs1 = gimple_assign_rhs1 (stmt);
3404 rhs2 = gimple_assign_rhs2 (stmt);
3405 rhs_code = gimple_assign_rhs_code (stmt);
3406 loc = gimple_location (stmt);
3407 switch (rhs_code)
3409 case SSA_NAME:
3410 CASE_CONVERT:
3411 irhs1 = *defs.get (rhs1);
3412 itype = TREE_TYPE (irhs1);
3413 pattern_stmt
3414 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3415 SSA_NAME, irhs1);
3416 break;
3418 case BIT_NOT_EXPR:
3419 irhs1 = *defs.get (rhs1);
3420 itype = TREE_TYPE (irhs1);
3421 pattern_stmt
3422 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3423 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3424 break;
3426 case BIT_AND_EXPR:
3427 /* Try to optimize x = y & (a < b ? 1 : 0); into
3428 x = (a < b ? y : 0);
3430 E.g. for:
3431 bool a_b, b_b, c_b;
3432 TYPE d_T;
3434 S1 a_b = x1 CMP1 y1;
3435 S2 b_b = x2 CMP2 y2;
3436 S3 c_b = a_b & b_b;
3437 S4 d_T = (TYPE) c_b;
3439 we would normally emit:
3441 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3442 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3443 S3' c_T = a_T & b_T;
3444 S4' d_T = c_T;
3446 but we can save one stmt by using the
3447 result of one of the COND_EXPRs in the other COND_EXPR and leave
3448 BIT_AND_EXPR stmt out:
3450 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3451 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3452 S4' f_T = c_T;
3454 At least when VEC_COND_EXPR is implemented using masks
3455 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3456 computes the comparison masks and ands it, in one case with
3457 all ones vector, in the other case with a vector register.
3458 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3459 often more expensive. */
3460 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3461 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3462 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3464 irhs1 = *defs.get (rhs1);
3465 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3466 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3467 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3469 rhs_code = def_rhs_code;
3470 rhs1 = def_rhs1;
3471 rhs2 = gimple_assign_rhs2 (def_stmt);
3472 trueval = irhs1;
3473 goto do_compare;
3475 else
3476 irhs2 = *defs.get (rhs2);
3477 goto and_ior_xor;
3479 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3480 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3481 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3483 irhs2 = *defs.get (rhs2);
3484 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3485 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3486 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3488 rhs_code = def_rhs_code;
3489 rhs1 = def_rhs1;
3490 rhs2 = gimple_assign_rhs2 (def_stmt);
3491 trueval = irhs2;
3492 goto do_compare;
3494 else
3495 irhs1 = *defs.get (rhs1);
3496 goto and_ior_xor;
3498 /* FALLTHRU */
3499 case BIT_IOR_EXPR:
3500 case BIT_XOR_EXPR:
3501 irhs1 = *defs.get (rhs1);
3502 irhs2 = *defs.get (rhs2);
3503 and_ior_xor:
3504 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3505 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3507 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3508 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3509 int out_prec = TYPE_PRECISION (out_type);
3510 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3511 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3512 stmt_info);
3513 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3514 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3515 stmt_info);
3516 else
3518 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3519 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3522 itype = TREE_TYPE (irhs1);
3523 pattern_stmt
3524 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3525 rhs_code, irhs1, irhs2);
3526 break;
3528 default:
3529 do_compare:
3530 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3531 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3532 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3533 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3534 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3536 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3537 itype
3538 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3540 else
3541 itype = TREE_TYPE (rhs1);
3542 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3543 if (trueval == NULL_TREE)
3544 trueval = build_int_cst (itype, 1);
3545 else
3546 gcc_checking_assert (useless_type_conversion_p (itype,
3547 TREE_TYPE (trueval)));
3548 pattern_stmt
3549 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3550 COND_EXPR, cond_expr, trueval,
3551 build_int_cst (itype, 0));
3552 break;
3555 gimple_set_location (pattern_stmt, loc);
3556 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3557 pattern def seq stmts instead of just letting auto-detection do
3558 its work? */
3559 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3560 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3561 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3562 append_pattern_def_seq (stmt_info, pattern_stmt);
3563 defs.put (var, gimple_assign_lhs (pattern_stmt));
3566 /* Comparison function to qsort a vector of gimple stmts after UID. */
3568 static int
3569 sort_after_uid (const void *p1, const void *p2)
3571 const gimple *stmt1 = *(const gimple * const *)p1;
3572 const gimple *stmt2 = *(const gimple * const *)p2;
3573 return gimple_uid (stmt1) - gimple_uid (stmt2);
3576 /* Create pattern stmts for all stmts participating in the bool pattern
3577 specified by BOOL_STMT_SET and its root STMT with the desired type
3578 OUT_TYPE. Return the def of the pattern root. */
3580 static tree
3581 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3582 tree out_type, gimple *stmt)
3584 /* Gather original stmts in the bool pattern in their order of appearance
3585 in the IL. */
3586 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3587 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3588 i != bool_stmt_set.end (); ++i)
3589 bool_stmts.quick_push (*i);
3590 bool_stmts.qsort (sort_after_uid);
3592 /* Now process them in that order, producing pattern stmts. */
3593 hash_map <tree, tree> defs;
3594 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3595 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3596 out_type, vinfo_for_stmt (stmt), defs);
3598 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3599 gimple *pattern_stmt
3600 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3601 return gimple_assign_lhs (pattern_stmt);
3604 /* Helper for search_type_for_mask. */
3606 static tree
3607 search_type_for_mask_1 (tree var, vec_info *vinfo,
3608 hash_map<gimple *, tree> &cache)
3610 tree rhs1;
3611 enum tree_code rhs_code;
3612 tree res = NULL_TREE, res2;
3614 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3615 return NULL_TREE;
3617 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3618 if (!def_stmt_info)
3619 return NULL_TREE;
3621 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3622 if (!def_stmt)
3623 return NULL_TREE;
3625 tree *c = cache.get (def_stmt);
3626 if (c)
3627 return *c;
3629 rhs_code = gimple_assign_rhs_code (def_stmt);
3630 rhs1 = gimple_assign_rhs1 (def_stmt);
3632 switch (rhs_code)
3634 case SSA_NAME:
3635 case BIT_NOT_EXPR:
3636 CASE_CONVERT:
3637 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3638 break;
3640 case BIT_AND_EXPR:
3641 case BIT_IOR_EXPR:
3642 case BIT_XOR_EXPR:
3643 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3644 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3645 cache);
3646 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3647 res = res2;
3648 break;
3650 default:
3651 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3653 tree comp_vectype, mask_type;
3655 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3657 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3658 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3659 vinfo, cache);
3660 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3661 res = res2;
3662 break;
3665 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3666 if (comp_vectype == NULL_TREE)
3668 res = NULL_TREE;
3669 break;
3672 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3673 if (!mask_type
3674 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3676 res = NULL_TREE;
3677 break;
3680 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3681 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3683 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3684 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3686 else
3687 res = TREE_TYPE (rhs1);
3691 cache.put (def_stmt, res);
3692 return res;
3695 /* Return the proper type for converting bool VAR into
3696 an integer value or NULL_TREE if no such type exists.
3697 The type is chosen so that converted value has the
3698 same number of elements as VAR's vector type. */
3700 static tree
3701 search_type_for_mask (tree var, vec_info *vinfo)
3703 hash_map<gimple *, tree> cache;
3704 return search_type_for_mask_1 (var, vinfo, cache);
3707 /* Function vect_recog_bool_pattern
3709 Try to find pattern like following:
3711 bool a_b, b_b, c_b, d_b, e_b;
3712 TYPE f_T;
3713 loop:
3714 S1 a_b = x1 CMP1 y1;
3715 S2 b_b = x2 CMP2 y2;
3716 S3 c_b = a_b & b_b;
3717 S4 d_b = x3 CMP3 y3;
3718 S5 e_b = c_b | d_b;
3719 S6 f_T = (TYPE) e_b;
3721 where type 'TYPE' is an integral type. Or a similar pattern
3722 ending in
3724 S6 f_Y = e_b ? r_Y : s_Y;
3726 as results from if-conversion of a complex condition.
3728 Input:
3730 * LAST_STMT: A stmt at the end from which the pattern
3731 search begins, i.e. cast of a bool to
3732 an integer type.
3734 Output:
3736 * TYPE_OUT: The type of the output of this pattern.
3738 * Return value: A new stmt that will be used to replace the pattern.
3740 Assuming size of TYPE is the same as size of all comparisons
3741 (otherwise some casts would be added where needed), the above
3742 sequence we create related pattern stmts:
3743 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3744 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3745 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3746 S5' e_T = c_T | d_T;
3747 S6' f_T = e_T;
3749 Instead of the above S3' we could emit:
3750 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3751 S3' c_T = a_T | b_T;
3752 but the above is more efficient. */
3754 static gimple *
3755 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_out)
3757 gimple *last_stmt = stmts->pop ();
3758 enum tree_code rhs_code;
3759 tree var, lhs, rhs, vectype;
3760 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3761 stmt_vec_info new_stmt_info;
3762 vec_info *vinfo = stmt_vinfo->vinfo;
3763 gimple *pattern_stmt;
3765 if (!is_gimple_assign (last_stmt))
3766 return NULL;
3768 var = gimple_assign_rhs1 (last_stmt);
3769 lhs = gimple_assign_lhs (last_stmt);
3771 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3772 return NULL;
3774 hash_set<gimple *> bool_stmts;
3776 rhs_code = gimple_assign_rhs_code (last_stmt);
3777 if (CONVERT_EXPR_CODE_P (rhs_code))
3779 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3780 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3781 return NULL;
3782 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3783 if (vectype == NULL_TREE)
3784 return NULL;
3786 if (check_bool_pattern (var, vinfo, bool_stmts))
3788 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3789 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3790 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3791 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3792 else
3793 pattern_stmt
3794 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3796 else
3798 tree type = search_type_for_mask (var, vinfo);
3799 tree cst0, cst1, tmp;
3801 if (!type)
3802 return NULL;
3804 /* We may directly use cond with narrowed type to avoid
3805 multiple cond exprs with following result packing and
3806 perform single cond with packed mask instead. In case
3807 of widening we better make cond first and then extract
3808 results. */
3809 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3810 type = TREE_TYPE (lhs);
3812 cst0 = build_int_cst (type, 0);
3813 cst1 = build_int_cst (type, 1);
3814 tmp = vect_recog_temp_ssa_var (type, NULL);
3815 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3817 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3819 tree new_vectype = get_vectype_for_scalar_type (type);
3820 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3821 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3822 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3823 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3825 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3826 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3830 *type_out = vectype;
3831 stmts->safe_push (last_stmt);
3832 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3834 return pattern_stmt;
3836 else if (rhs_code == COND_EXPR
3837 && TREE_CODE (var) == SSA_NAME)
3839 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3840 if (vectype == NULL_TREE)
3841 return NULL;
3843 /* Build a scalar type for the boolean result that when
3844 vectorized matches the vector type of the result in
3845 size and number of elements. */
3846 unsigned prec
3847 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3848 TYPE_VECTOR_SUBPARTS (vectype));
3850 tree type
3851 = build_nonstandard_integer_type (prec,
3852 TYPE_UNSIGNED (TREE_TYPE (var)));
3853 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3854 return NULL;
3856 if (!check_bool_pattern (var, vinfo, bool_stmts))
3857 return NULL;
3859 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3861 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3862 pattern_stmt
3863 = gimple_build_assign (lhs, COND_EXPR,
3864 build2 (NE_EXPR, boolean_type_node,
3865 rhs, build_int_cst (type, 0)),
3866 gimple_assign_rhs2 (last_stmt),
3867 gimple_assign_rhs3 (last_stmt));
3868 *type_out = vectype;
3869 stmts->safe_push (last_stmt);
3870 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3872 return pattern_stmt;
3874 else if (rhs_code == SSA_NAME
3875 && STMT_VINFO_DATA_REF (stmt_vinfo))
3877 stmt_vec_info pattern_stmt_info;
3878 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3879 gcc_assert (vectype != NULL_TREE);
3880 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3881 return NULL;
3883 if (check_bool_pattern (var, vinfo, bool_stmts))
3884 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3885 else
3887 tree type = search_type_for_mask (var, vinfo);
3888 tree cst0, cst1, new_vectype;
3890 if (!type)
3891 return NULL;
3893 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3894 type = TREE_TYPE (vectype);
3896 cst0 = build_int_cst (type, 0);
3897 cst1 = build_int_cst (type, 1);
3898 new_vectype = get_vectype_for_scalar_type (type);
3900 rhs = vect_recog_temp_ssa_var (type, NULL);
3901 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3903 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3904 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3905 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3906 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3909 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3910 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3912 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3913 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3914 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3915 rhs = rhs2;
3917 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3918 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3919 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3920 STMT_VINFO_DATA_REF (pattern_stmt_info)
3921 = STMT_VINFO_DATA_REF (stmt_vinfo);
3922 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3923 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3924 *type_out = vectype;
3925 stmts->safe_push (last_stmt);
3926 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3928 return pattern_stmt;
3930 else
3931 return NULL;
3935 /* A helper for vect_recog_mask_conversion_pattern. Build
3936 conversion of MASK to a type suitable for masking VECTYPE.
3937 Built statement gets required vectype and is appended to
3938 a pattern sequence of STMT_VINFO.
3940 Return converted mask. */
3942 static tree
3943 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3944 vec_info *vinfo)
3946 gimple *stmt;
3947 tree masktype, tmp;
3948 stmt_vec_info new_stmt_info;
3950 masktype = build_same_sized_truth_vector_type (vectype);
3951 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3952 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3953 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3954 set_vinfo_for_stmt (stmt, new_stmt_info);
3955 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3956 append_pattern_def_seq (stmt_vinfo, stmt);
3958 return tmp;
3962 /* Function vect_recog_mask_conversion_pattern
3964 Try to find statements which require boolean type
3965 converison. Additional conversion statements are
3966 added to handle such cases. For example:
3968 bool m_1, m_2, m_3;
3969 int i_4, i_5;
3970 double d_6, d_7;
3971 char c_1, c_2, c_3;
3973 S1 m_1 = i_4 > i_5;
3974 S2 m_2 = d_6 < d_7;
3975 S3 m_3 = m_1 & m_2;
3976 S4 c_1 = m_3 ? c_2 : c_3;
3978 Will be transformed into:
3980 S1 m_1 = i_4 > i_5;
3981 S2 m_2 = d_6 < d_7;
3982 S3'' m_2' = (_Bool[bitsize=32])m_2
3983 S3' m_3' = m_1 & m_2';
3984 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3985 S4' c_1' = m_3'' ? c_2 : c_3; */
3987 static gimple *
3988 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_out)
3990 gimple *last_stmt = stmts->pop ();
3991 enum tree_code rhs_code;
3992 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3993 tree vectype1, vectype2;
3994 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3995 stmt_vec_info pattern_stmt_info;
3996 vec_info *vinfo = stmt_vinfo->vinfo;
3998 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3999 if (is_gimple_call (last_stmt)
4000 && gimple_call_internal_p (last_stmt)
4001 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
4002 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
4004 gcall *pattern_stmt;
4005 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
4007 if (load)
4009 lhs = gimple_call_lhs (last_stmt);
4010 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4012 else
4014 rhs2 = gimple_call_arg (last_stmt, 3);
4015 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
4018 rhs1 = gimple_call_arg (last_stmt, 2);
4019 rhs1_type = search_type_for_mask (rhs1, vinfo);
4020 if (!rhs1_type)
4021 return NULL;
4022 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4024 if (!vectype1 || !vectype2
4025 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4026 TYPE_VECTOR_SUBPARTS (vectype2)))
4027 return NULL;
4029 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4031 if (load)
4033 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4034 pattern_stmt
4035 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
4036 gimple_call_arg (last_stmt, 0),
4037 gimple_call_arg (last_stmt, 1),
4038 tmp);
4039 gimple_call_set_lhs (pattern_stmt, lhs);
4041 else
4042 pattern_stmt
4043 = gimple_build_call_internal (IFN_MASK_STORE, 4,
4044 gimple_call_arg (last_stmt, 0),
4045 gimple_call_arg (last_stmt, 1),
4046 tmp,
4047 gimple_call_arg (last_stmt, 3));
4049 gimple_call_set_nothrow (pattern_stmt, true);
4051 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4052 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4053 STMT_VINFO_DATA_REF (pattern_stmt_info)
4054 = STMT_VINFO_DATA_REF (stmt_vinfo);
4055 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4056 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
4057 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4058 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo);
4060 *type_out = vectype1;
4061 stmts->safe_push (last_stmt);
4062 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4064 return pattern_stmt;
4067 if (!is_gimple_assign (last_stmt))
4068 return NULL;
4070 gimple *pattern_stmt;
4071 lhs = gimple_assign_lhs (last_stmt);
4072 rhs1 = gimple_assign_rhs1 (last_stmt);
4073 rhs_code = gimple_assign_rhs_code (last_stmt);
4075 /* Check for cond expression requiring mask conversion. */
4076 if (rhs_code == COND_EXPR)
4078 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4080 if (TREE_CODE (rhs1) == SSA_NAME)
4082 rhs1_type = search_type_for_mask (rhs1, vinfo);
4083 if (!rhs1_type)
4084 return NULL;
4086 else if (COMPARISON_CLASS_P (rhs1))
4088 /* Check whether we're comparing scalar booleans and (if so)
4089 whether a better mask type exists than the mask associated
4090 with boolean-sized elements. This avoids unnecessary packs
4091 and unpacks if the booleans are set from comparisons of
4092 wider types. E.g. in:
4094 int x1, x2, x3, x4, y1, y1;
4096 bool b1 = (x1 == x2);
4097 bool b2 = (x3 == x4);
4098 ... = b1 == b2 ? y1 : y2;
4100 it is better for b1 and b2 to use the mask type associated
4101 with int elements rather bool (byte) elements. */
4102 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4103 if (!rhs1_type)
4104 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4106 else
4107 return NULL;
4109 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4111 if (!vectype1 || !vectype2)
4112 return NULL;
4114 /* Continue if a conversion is needed. Also continue if we have
4115 a comparison whose vector type would normally be different from
4116 VECTYPE2 when considered in isolation. In that case we'll
4117 replace the comparison with an SSA name (so that we can record
4118 its vector type) and behave as though the comparison was an SSA
4119 name from the outset. */
4120 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4121 TYPE_VECTOR_SUBPARTS (vectype2))
4122 && (TREE_CODE (rhs1) == SSA_NAME
4123 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4124 return NULL;
4126 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4127 in place, we can handle it in vectorizable_condition. This avoids
4128 unnecessary promotion stmts and increased vectorization factor. */
4129 if (COMPARISON_CLASS_P (rhs1)
4130 && INTEGRAL_TYPE_P (rhs1_type)
4131 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4132 TYPE_VECTOR_SUBPARTS (vectype2)))
4134 enum vect_def_type dt;
4135 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4136 && dt == vect_external_def
4137 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4138 && (dt == vect_external_def
4139 || dt == vect_constant_def))
4141 tree wide_scalar_type = build_nonstandard_integer_type
4142 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4143 TYPE_UNSIGNED (rhs1_type));
4144 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4145 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4146 return NULL;
4150 /* If rhs1 is a comparison we need to move it into a
4151 separate statement. */
4152 if (TREE_CODE (rhs1) != SSA_NAME)
4154 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4155 pattern_stmt = gimple_build_assign (tmp, rhs1);
4156 rhs1 = tmp;
4158 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4159 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4160 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4161 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4164 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4165 TYPE_VECTOR_SUBPARTS (vectype2)))
4166 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4167 else
4168 tmp = rhs1;
4170 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4171 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4172 gimple_assign_rhs2 (last_stmt),
4173 gimple_assign_rhs3 (last_stmt));
4175 *type_out = vectype1;
4176 stmts->safe_push (last_stmt);
4177 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4179 return pattern_stmt;
4182 /* Now check for binary boolean operations requiring conversion for
4183 one of operands. */
4184 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4185 return NULL;
4187 if (rhs_code != BIT_IOR_EXPR
4188 && rhs_code != BIT_XOR_EXPR
4189 && rhs_code != BIT_AND_EXPR
4190 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4191 return NULL;
4193 rhs2 = gimple_assign_rhs2 (last_stmt);
4195 rhs1_type = search_type_for_mask (rhs1, vinfo);
4196 rhs2_type = search_type_for_mask (rhs2, vinfo);
4198 if (!rhs1_type || !rhs2_type
4199 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4200 return NULL;
4202 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4204 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4205 if (!vectype1)
4206 return NULL;
4207 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4209 else
4211 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4212 if (!vectype1)
4213 return NULL;
4214 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4217 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4218 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4220 *type_out = vectype1;
4221 stmts->safe_push (last_stmt);
4222 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4224 return pattern_stmt;
4227 /* STMT is a load or store. If the load or store is conditional, return
4228 the boolean condition under which it occurs, otherwise return null. */
4230 static tree
4231 vect_get_load_store_mask (gimple *stmt)
4233 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4235 gcc_assert (gimple_assign_single_p (def_assign));
4236 return NULL_TREE;
4239 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4241 internal_fn ifn = gimple_call_internal_fn (def_call);
4242 int mask_index = internal_fn_mask_index (ifn);
4243 return gimple_call_arg (def_call, mask_index);
4246 gcc_unreachable ();
4249 /* Return the scalar offset type that an internal gather/scatter function
4250 should use. GS_INFO describes the gather/scatter operation. */
4252 static tree
4253 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4255 tree offset_type = TREE_TYPE (gs_info->offset);
4256 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4258 /* Enforced by vect_check_gather_scatter. */
4259 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4260 gcc_assert (element_bits >= offset_bits);
4262 /* If the offset is narrower than the elements, extend it according
4263 to its sign. */
4264 if (element_bits > offset_bits)
4265 return build_nonstandard_integer_type (element_bits,
4266 TYPE_UNSIGNED (offset_type));
4268 return offset_type;
4271 /* Return MASK if MASK is suitable for masking an operation on vectors
4272 of type VECTYPE, otherwise convert it into such a form and return
4273 the result. Associate any conversion statements with STMT_INFO's
4274 pattern. */
4276 static tree
4277 vect_convert_mask_for_vectype (tree mask, tree vectype,
4278 stmt_vec_info stmt_info, vec_info *vinfo)
4280 tree mask_type = search_type_for_mask (mask, vinfo);
4281 if (mask_type)
4283 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4284 if (mask_vectype
4285 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4286 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4287 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4289 return mask;
4292 /* Return the equivalent of:
4294 fold_convert (TYPE, VALUE)
4296 with the expectation that the operation will be vectorized.
4297 If new statements are needed, add them as pattern statements
4298 to STMT_INFO. */
4300 static tree
4301 vect_add_conversion_to_patterm (tree type, tree value,
4302 stmt_vec_info stmt_info,
4303 vec_info *vinfo)
4305 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4306 return value;
4308 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4309 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4310 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4311 set_vinfo_for_stmt (conversion, new_stmt_info);
4312 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4313 append_pattern_def_seq (stmt_info, conversion);
4314 return new_value;
4317 /* Try to convert STMT into a call to a gather load or scatter store
4318 internal function. Return the final statement on success and set
4319 *TYPE_OUT to the vector type being loaded or stored.
4321 This function only handles gathers and scatters that were recognized
4322 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4324 static gimple *
4325 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4326 tree *type_out)
4328 /* Currently we only support this for loop vectorization. */
4329 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4330 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4331 if (!loop_vinfo)
4332 return NULL;
4334 /* Make sure that we're looking at a gather load or scatter store. */
4335 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4336 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4337 return NULL;
4339 /* Get the boolean that controls whether the load or store happens.
4340 This is null if the operation is unconditional. */
4341 tree mask = vect_get_load_store_mask (stmt);
4343 /* Make sure that the target supports an appropriate internal
4344 function for the gather/scatter operation. */
4345 gather_scatter_info gs_info;
4346 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4347 || gs_info.decl)
4348 return NULL;
4350 /* Convert the mask to the right form. */
4351 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4352 if (mask)
4353 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4354 loop_vinfo);
4356 /* Get the invariant base and non-invariant offset, converting the
4357 latter to the same width as the vector elements. */
4358 tree base = gs_info.base;
4359 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4360 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4361 last_stmt_info, loop_vinfo);
4363 /* Build the new pattern statement. */
4364 tree scale = size_int (gs_info.scale);
4365 gcall *pattern_stmt;
4366 if (DR_IS_READ (dr))
4368 if (mask != NULL)
4369 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4370 offset, scale, mask);
4371 else
4372 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4373 offset, scale);
4374 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4375 gimple_call_set_lhs (pattern_stmt, load_lhs);
4377 else
4379 tree rhs = vect_get_store_rhs (stmt);
4380 if (mask != NULL)
4381 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4382 base, offset, scale, rhs,
4383 mask);
4384 else
4385 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4386 base, offset, scale, rhs);
4388 gimple_call_set_nothrow (pattern_stmt, true);
4390 /* Copy across relevant vectorization info and associate DR with the
4391 new pattern statement instead of the original statement. */
4392 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4393 loop_vinfo);
4394 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4395 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4396 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4397 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4398 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4399 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4401 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4402 *type_out = vectype;
4403 vect_pattern_detected ("gather/scatter pattern", stmt);
4405 return pattern_stmt;
4408 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4410 static gimple *
4411 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_out)
4413 gimple *last_stmt = stmts->pop ();
4414 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4415 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4416 last_stmt_info,
4417 type_out);
4418 if (pattern_stmt)
4419 stmts->safe_push (last_stmt);
4420 return pattern_stmt;
4423 /* Return true if TYPE is a non-boolean integer type. These are the types
4424 that we want to consider for narrowing. */
4426 static bool
4427 vect_narrowable_type_p (tree type)
4429 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4432 /* Return true if the operation given by CODE can be truncated to N bits
4433 when only N bits of the output are needed. This is only true if bit N+1
4434 of the inputs has no effect on the low N bits of the result. */
4436 static bool
4437 vect_truncatable_operation_p (tree_code code)
4439 switch (code)
4441 case PLUS_EXPR:
4442 case MINUS_EXPR:
4443 case MULT_EXPR:
4444 case BIT_AND_EXPR:
4445 case BIT_IOR_EXPR:
4446 case BIT_XOR_EXPR:
4447 case COND_EXPR:
4448 return true;
4450 default:
4451 return false;
4455 /* Record that STMT_INFO could be changed from operating on TYPE to
4456 operating on a type with the precision and sign given by PRECISION
4457 and SIGN respectively. PRECISION is an arbitrary bit precision;
4458 it might not be a whole number of bytes. */
4460 static void
4461 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4462 unsigned int precision, signop sign)
4464 /* Round the precision up to a whole number of bytes. */
4465 precision = vect_element_precision (precision);
4466 if (precision < TYPE_PRECISION (type)
4467 && (!stmt_info->operation_precision
4468 || stmt_info->operation_precision > precision))
4470 stmt_info->operation_precision = precision;
4471 stmt_info->operation_sign = sign;
4475 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4476 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4477 is an arbitrary bit precision; it might not be a whole number of bytes. */
4479 static void
4480 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4481 unsigned int min_input_precision)
4483 /* This operation in isolation only requires the inputs to have
4484 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4485 that MIN_INPUT_PRECISION is a natural precision for the chain
4486 as a whole. E.g. consider something like:
4488 unsigned short *x, *y;
4489 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4491 The right shift can be done on unsigned chars, and only requires the
4492 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4493 approach would mean turning a natural chain of single-vector unsigned
4494 short operations into one that truncates "*x" and then extends
4495 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4496 operation and one vector for each unsigned char operation.
4497 This would be a significant pessimization.
4499 Instead only propagate the maximum of this precision and the precision
4500 required by the users of the result. This means that we don't pessimize
4501 the case above but continue to optimize things like:
4503 unsigned char *y;
4504 unsigned short *x;
4505 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4507 Here we would truncate two vectors of *x to a single vector of
4508 unsigned chars and use single-vector unsigned char operations for
4509 everything else, rather than doing two unsigned short copies of
4510 "(*x & 0xf0) >> 4" and then truncating the result. */
4511 min_input_precision = MAX (min_input_precision,
4512 stmt_info->min_output_precision);
4514 if (min_input_precision < TYPE_PRECISION (type)
4515 && (!stmt_info->min_input_precision
4516 || stmt_info->min_input_precision > min_input_precision))
4517 stmt_info->min_input_precision = min_input_precision;
4520 /* Subroutine of vect_determine_min_output_precision. Return true if
4521 we can calculate a reduced number of output bits for STMT_INFO,
4522 whose result is LHS. */
4524 static bool
4525 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4527 /* Take the maximum precision required by users of the result. */
4528 unsigned int precision = 0;
4529 imm_use_iterator iter;
4530 use_operand_p use;
4531 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4533 gimple *use_stmt = USE_STMT (use);
4534 if (is_gimple_debug (use_stmt))
4535 continue;
4536 if (!vect_stmt_in_region_p (stmt_info->vinfo, use_stmt))
4537 return false;
4538 stmt_vec_info use_stmt_info = vinfo_for_stmt (use_stmt);
4539 if (!use_stmt_info->min_input_precision)
4540 return false;
4541 precision = MAX (precision, use_stmt_info->min_input_precision);
4544 if (dump_enabled_p ())
4546 dump_printf_loc (MSG_NOTE, vect_location, "only the low %d bits of ",
4547 precision);
4548 dump_generic_expr (MSG_NOTE, TDF_SLIM, lhs);
4549 dump_printf (MSG_NOTE, " are significant\n");
4551 stmt_info->min_output_precision = precision;
4552 return true;
4555 /* Calculate min_output_precision for STMT_INFO. */
4557 static void
4558 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4560 /* We're only interested in statements with a narrowable result. */
4561 tree lhs = gimple_get_lhs (stmt_info->stmt);
4562 if (!lhs
4563 || TREE_CODE (lhs) != SSA_NAME
4564 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4565 return;
4567 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4568 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4571 /* Use range information to decide whether STMT (described by STMT_INFO)
4572 could be done in a narrower type. This is effectively a forward
4573 propagation, since it uses context-independent information that applies
4574 to all users of an SSA name. */
4576 static void
4577 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4579 tree lhs = gimple_assign_lhs (stmt);
4580 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4581 return;
4583 tree type = TREE_TYPE (lhs);
4584 if (!vect_narrowable_type_p (type))
4585 return;
4587 /* First see whether we have any useful range information for the result. */
4588 unsigned int precision = TYPE_PRECISION (type);
4589 signop sign = TYPE_SIGN (type);
4590 wide_int min_value, max_value;
4591 if (!vect_get_range_info (lhs, &min_value, &max_value))
4592 return;
4594 tree_code code = gimple_assign_rhs_code (stmt);
4595 unsigned int nops = gimple_num_ops (stmt);
4597 if (!vect_truncatable_operation_p (code))
4598 /* Check that all relevant input operands are compatible, and update
4599 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4600 for (unsigned int i = 1; i < nops; ++i)
4602 tree op = gimple_op (stmt, i);
4603 if (TREE_CODE (op) == INTEGER_CST)
4605 /* Don't require the integer to have RHS_TYPE (which it might
4606 not for things like shift amounts, etc.), but do require it
4607 to fit the type. */
4608 if (!int_fits_type_p (op, type))
4609 return;
4611 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4612 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4614 else if (TREE_CODE (op) == SSA_NAME)
4616 /* Ignore codes that don't take uniform arguments. */
4617 if (!types_compatible_p (TREE_TYPE (op), type))
4618 return;
4620 wide_int op_min_value, op_max_value;
4621 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4622 return;
4624 min_value = wi::min (min_value, op_min_value, sign);
4625 max_value = wi::max (max_value, op_max_value, sign);
4627 else
4628 return;
4631 /* Try to switch signed types for unsigned types if we can.
4632 This is better for two reasons. First, unsigned ops tend
4633 to be cheaper than signed ops. Second, it means that we can
4634 handle things like:
4636 signed char c;
4637 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4641 signed char c;
4642 unsigned short res_1 = (unsigned short) c & 0xff00;
4643 int res = (int) res_1;
4645 where the intermediate result res_1 has unsigned rather than
4646 signed type. */
4647 if (sign == SIGNED && !wi::neg_p (min_value))
4648 sign = UNSIGNED;
4650 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4651 unsigned int precision1 = wi::min_precision (min_value, sign);
4652 unsigned int precision2 = wi::min_precision (max_value, sign);
4653 unsigned int value_precision = MAX (precision1, precision2);
4654 if (value_precision >= precision)
4655 return;
4657 if (dump_enabled_p ())
4659 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4660 " without loss of precision: ",
4661 sign == SIGNED ? "signed" : "unsigned",
4662 value_precision);
4663 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4666 vect_set_operation_type (stmt_info, type, value_precision, sign);
4667 vect_set_min_input_precision (stmt_info, type, value_precision);
4670 /* Use information about the users of STMT's result to decide whether
4671 STMT (described by STMT_INFO) could be done in a narrower type.
4672 This is effectively a backward propagation. */
4674 static void
4675 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4677 tree_code code = gimple_assign_rhs_code (stmt);
4678 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4679 tree type = TREE_TYPE (gimple_op (stmt, opno));
4680 if (!vect_narrowable_type_p (type))
4681 return;
4683 unsigned int precision = TYPE_PRECISION (type);
4684 unsigned int operation_precision, min_input_precision;
4685 switch (code)
4687 CASE_CONVERT:
4688 /* Only the bits that contribute to the output matter. Don't change
4689 the precision of the operation itself. */
4690 operation_precision = precision;
4691 min_input_precision = stmt_info->min_output_precision;
4692 break;
4694 case LSHIFT_EXPR:
4695 case RSHIFT_EXPR:
4697 tree shift = gimple_assign_rhs2 (stmt);
4698 if (TREE_CODE (shift) != INTEGER_CST
4699 || !wi::ltu_p (wi::to_widest (shift), precision))
4700 return;
4701 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4702 if (code == LSHIFT_EXPR)
4704 /* We need CONST_SHIFT fewer bits of the input. */
4705 operation_precision = stmt_info->min_output_precision;
4706 min_input_precision = (MAX (operation_precision, const_shift)
4707 - const_shift);
4709 else
4711 /* We need CONST_SHIFT extra bits to do the operation. */
4712 operation_precision = (stmt_info->min_output_precision
4713 + const_shift);
4714 min_input_precision = operation_precision;
4716 break;
4719 default:
4720 if (vect_truncatable_operation_p (code))
4722 /* Input bit N has no effect on output bits N-1 and lower. */
4723 operation_precision = stmt_info->min_output_precision;
4724 min_input_precision = operation_precision;
4725 break;
4727 return;
4730 if (operation_precision < precision)
4732 if (dump_enabled_p ())
4734 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4735 " without affecting users: ",
4736 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4737 operation_precision);
4738 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4740 vect_set_operation_type (stmt_info, type, operation_precision,
4741 TYPE_SIGN (type));
4743 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4746 /* Handle vect_determine_precisions for STMT_INFO, given that we
4747 have already done so for the users of its result. */
4749 void
4750 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4752 vect_determine_min_output_precision (stmt_info);
4753 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4755 vect_determine_precisions_from_range (stmt_info, stmt);
4756 vect_determine_precisions_from_users (stmt_info, stmt);
4760 /* Walk backwards through the vectorizable region to determine the
4761 values of these fields:
4763 - min_output_precision
4764 - min_input_precision
4765 - operation_precision
4766 - operation_sign. */
4768 void
4769 vect_determine_precisions (vec_info *vinfo)
4771 DUMP_VECT_SCOPE ("vect_determine_precisions");
4773 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4775 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4776 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4777 unsigned int nbbs = loop->num_nodes;
4779 for (unsigned int i = 0; i < nbbs; i++)
4781 basic_block bb = bbs[nbbs - i - 1];
4782 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4783 !gsi_end_p (si); gsi_prev (&si))
4784 vect_determine_stmt_precisions (vinfo_for_stmt (gsi_stmt (si)));
4787 else
4789 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4790 gimple_stmt_iterator si = bb_vinfo->region_end;
4791 gimple *stmt;
4794 if (!gsi_stmt (si))
4795 si = gsi_last_bb (bb_vinfo->bb);
4796 else
4797 gsi_prev (&si);
4798 stmt = gsi_stmt (si);
4799 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4800 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4801 vect_determine_stmt_precisions (stmt_info);
4803 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4807 typedef gimple *(*vect_recog_func_ptr) (vec<gimple *> *, tree *);
4809 struct vect_recog_func
4811 vect_recog_func_ptr fn;
4812 const char *name;
4815 /* Note that ordering matters - the first pattern matching on a stmt is
4816 taken which means usually the more complex one needs to preceed the
4817 less comples onex (widen_sum only after dot_prod or sad for example). */
4818 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4819 { vect_recog_over_widening_pattern, "over_widening" },
4820 /* Must come after over_widening, which narrows the shift as much as
4821 possible beforehand. */
4822 { vect_recog_average_pattern, "average" },
4823 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4824 { vect_recog_widen_mult_pattern, "widen_mult" },
4825 { vect_recog_dot_prod_pattern, "dot_prod" },
4826 { vect_recog_sad_pattern, "sad" },
4827 { vect_recog_widen_sum_pattern, "widen_sum" },
4828 { vect_recog_pow_pattern, "pow" },
4829 { vect_recog_widen_shift_pattern, "widen_shift" },
4830 { vect_recog_rotate_pattern, "rotate" },
4831 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4832 { vect_recog_divmod_pattern, "divmod" },
4833 { vect_recog_mult_pattern, "mult" },
4834 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4835 { vect_recog_bool_pattern, "bool" },
4836 /* This must come before mask conversion, and includes the parts
4837 of mask conversion that are needed for gather and scatter
4838 internal functions. */
4839 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4840 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4843 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4845 /* Mark statements that are involved in a pattern. */
4847 static inline void
4848 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4849 tree pattern_vectype)
4851 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4852 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4854 bool old_pattern_p = is_pattern_stmt_p (orig_stmt_info);
4855 if (old_pattern_p)
4857 /* We're replacing a statement in an existing pattern definition
4858 sequence. */
4859 if (dump_enabled_p ())
4861 dump_printf_loc (MSG_NOTE, vect_location,
4862 "replacing earlier pattern ");
4863 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, orig_stmt, 0);
4866 /* To keep the book-keeping simple, just swap the lhs of the
4867 old and new statements, so that the old one has a valid but
4868 unused lhs. */
4869 tree old_lhs = gimple_get_lhs (orig_stmt);
4870 gimple_set_lhs (orig_stmt, gimple_get_lhs (pattern_stmt));
4871 gimple_set_lhs (pattern_stmt, old_lhs);
4873 if (dump_enabled_p ())
4875 dump_printf_loc (MSG_NOTE, vect_location, "with ");
4876 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4879 /* Switch to the statement that ORIG replaces. */
4880 orig_stmt_info
4881 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (orig_stmt_info));
4883 /* We shouldn't be replacing the main pattern statement. */
4884 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) != orig_stmt);
4887 if (def_seq)
4888 for (gimple_stmt_iterator si = gsi_start (def_seq);
4889 !gsi_end_p (si); gsi_next (&si))
4890 vect_init_pattern_stmt (gsi_stmt (si), orig_stmt_info, pattern_vectype);
4892 if (old_pattern_p)
4894 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4896 /* Insert all the new pattern statements before the original one. */
4897 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4898 gimple_stmt_iterator gsi = gsi_for_stmt (orig_stmt, orig_def_seq);
4899 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4900 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4902 /* Remove the pattern statement that this new pattern replaces. */
4903 gsi_remove (&gsi, false);
4905 else
4906 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4909 /* Function vect_pattern_recog_1
4911 Input:
4912 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4913 computation pattern.
4914 STMT: A stmt from which the pattern search should start.
4916 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4917 a sequence of statements that has the same functionality and can be
4918 used to replace STMT. It returns the last statement in the sequence
4919 and adds any earlier statements to STMT's STMT_VINFO_PATTERN_DEF_SEQ.
4920 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4921 statement, having first checked that the target supports the new operation
4922 in that type.
4924 This function also does some bookkeeping, as explained in the documentation
4925 for vect_recog_pattern. */
4927 static void
4928 vect_pattern_recog_1 (vect_recog_func *recog_func,
4929 gimple_stmt_iterator si,
4930 vec<gimple *> *stmts_to_replace)
4932 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4933 stmt_vec_info stmt_info;
4934 loop_vec_info loop_vinfo;
4935 tree pattern_vectype;
4936 int i;
4938 /* If this statement has already been replaced with pattern statements,
4939 leave the original statement alone, since the first match wins.
4940 Instead try to match against the definition statements that feed
4941 the main pattern statement. */
4942 stmt_info = vinfo_for_stmt (stmt);
4943 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4945 gimple_stmt_iterator gsi;
4946 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4947 !gsi_end_p (gsi); gsi_next (&gsi))
4948 vect_pattern_recog_1 (recog_func, gsi, stmts_to_replace);
4949 return;
4952 stmts_to_replace->truncate (0);
4953 stmts_to_replace->quick_push (stmt);
4954 pattern_stmt = recog_func->fn (stmts_to_replace, &pattern_vectype);
4955 if (!pattern_stmt)
4957 /* Clear related stmt info that analysis might have noted for
4958 to be replaced stmts. */
4959 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4960 && (unsigned) i < stmts_to_replace->length ();
4961 i++)
4963 stmt_info = vinfo_for_stmt (stmt);
4964 if (!is_pattern_stmt_p (stmt_info))
4965 STMT_VINFO_RELATED_STMT (stmt_info) = NULL;
4967 /* Clear any half-formed pattern definition sequence. */
4968 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4969 return;
4972 stmt = stmts_to_replace->last ();
4973 stmt_info = vinfo_for_stmt (stmt);
4974 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4975 gcc_assert (pattern_vectype);
4977 /* Found a vectorizable pattern. */
4978 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_NOTE, vect_location,
4981 "%s pattern recognized: ", recog_func->name);
4982 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4985 /* Mark the stmts that are involved in the pattern. */
4986 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4988 /* Patterns cannot be vectorized using SLP, because they change the order of
4989 computation. */
4990 if (loop_vinfo)
4992 unsigned ix, ix2;
4993 gimple **elem_ptr;
4994 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4995 elem_ptr, *elem_ptr == stmt);
4998 /* It is possible that additional pattern stmts are created and inserted in
4999 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
5000 relevant statements. */
5001 for (i = 0; stmts_to_replace->iterate (i, &stmt)
5002 && (unsigned) i < (stmts_to_replace->length () - 1);
5003 i++)
5005 stmt_info = vinfo_for_stmt (stmt);
5006 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5007 if (dump_enabled_p ())
5009 dump_printf_loc (MSG_NOTE, vect_location,
5010 "additional pattern stmt: ");
5011 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
5014 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
5017 return;
5021 /* Function vect_pattern_recog
5023 Input:
5024 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5025 computation idioms.
5027 Output - for each computation idiom that is detected we create a new stmt
5028 that provides the same functionality and that can be vectorized. We
5029 also record some information in the struct_stmt_info of the relevant
5030 stmts, as explained below:
5032 At the entry to this function we have the following stmts, with the
5033 following initial value in the STMT_VINFO fields:
5035 stmt in_pattern_p related_stmt vec_stmt
5036 S1: a_i = .... - - -
5037 S2: a_2 = ..use(a_i).. - - -
5038 S3: a_1 = ..use(a_2).. - - -
5039 S4: a_0 = ..use(a_1).. - - -
5040 S5: ... = ..use(a_0).. - - -
5042 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5043 represented by a single stmt. We then:
5044 - create a new stmt S6 equivalent to the pattern (the stmt is not
5045 inserted into the code)
5046 - fill in the STMT_VINFO fields as follows:
5048 in_pattern_p related_stmt vec_stmt
5049 S1: a_i = .... - - -
5050 S2: a_2 = ..use(a_i).. - - -
5051 S3: a_1 = ..use(a_2).. - - -
5052 S4: a_0 = ..use(a_1).. true S6 -
5053 '---> S6: a_new = .... - S4 -
5054 S5: ... = ..use(a_0).. - - -
5056 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5057 to each other through the RELATED_STMT field).
5059 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5060 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5061 remain irrelevant unless used by stmts other than S4.
5063 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5064 (because they are marked as irrelevant). It will vectorize S6, and record
5065 a pointer to the new vector stmt VS6 from S6 (as usual).
5066 S4 will be skipped, and S5 will be vectorized as usual:
5068 in_pattern_p related_stmt vec_stmt
5069 S1: a_i = .... - - -
5070 S2: a_2 = ..use(a_i).. - - -
5071 S3: a_1 = ..use(a_2).. - - -
5072 > VS6: va_new = .... - - -
5073 S4: a_0 = ..use(a_1).. true S6 VS6
5074 '---> S6: a_new = .... - S4 VS6
5075 > VS5: ... = ..vuse(va_new).. - - -
5076 S5: ... = ..use(a_0).. - - -
5078 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5079 elsewhere), and we'll end up with:
5081 VS6: va_new = ....
5082 VS5: ... = ..vuse(va_new)..
5084 In case of more than one pattern statements, e.g., widen-mult with
5085 intermediate type:
5087 S1 a_t = ;
5088 S2 a_T = (TYPE) a_t;
5089 '--> S3: a_it = (interm_type) a_t;
5090 S4 prod_T = a_T * CONST;
5091 '--> S5: prod_T' = a_it w* CONST;
5093 there may be other users of a_T outside the pattern. In that case S2 will
5094 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5095 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5096 be recorded in S3. */
5098 void
5099 vect_pattern_recog (vec_info *vinfo)
5101 struct loop *loop;
5102 basic_block *bbs;
5103 unsigned int nbbs;
5104 gimple_stmt_iterator si;
5105 unsigned int i, j;
5106 auto_vec<gimple *, 1> stmts_to_replace;
5108 vect_determine_precisions (vinfo);
5110 DUMP_VECT_SCOPE ("vect_pattern_recog");
5112 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5114 loop = LOOP_VINFO_LOOP (loop_vinfo);
5115 bbs = LOOP_VINFO_BBS (loop_vinfo);
5116 nbbs = loop->num_nodes;
5118 /* Scan through the loop stmts, applying the pattern recognition
5119 functions starting at each stmt visited: */
5120 for (i = 0; i < nbbs; i++)
5122 basic_block bb = bbs[i];
5123 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5124 /* Scan over all generic vect_recog_xxx_pattern functions. */
5125 for (j = 0; j < NUM_PATTERNS; j++)
5126 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
5127 &stmts_to_replace);
5130 else
5132 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5133 for (si = bb_vinfo->region_begin;
5134 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
5136 gimple *stmt = gsi_stmt (si);
5137 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5138 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
5139 continue;
5141 /* Scan over all generic vect_recog_xxx_pattern functions. */
5142 for (j = 0; j < NUM_PATTERNS; j++)
5143 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
5144 &stmts_to_replace);