Clean up interface to vector pattern recognisers
[official-gcc.git] / gcc / tree-vect-patterns.c
blob5fdd30f537528f33171877e745bea941b1356cf5
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
53 static bool
54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
56 value_range_type vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
71 return true;
73 else
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
80 return false;
84 /* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
87 static void
88 vect_pattern_detected (const char *name, gimple *stmt)
90 if (dump_enabled_p ())
92 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: ", name);
93 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
97 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO.
98 Set its vector type to VECTYPE if it doesn't have one already. */
100 static void
101 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
102 tree vectype)
104 stmt_vec_info pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
105 if (pattern_stmt_info == NULL)
107 pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
108 orig_stmt_info->vinfo);
109 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
111 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
113 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info->stmt;
114 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
115 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
116 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
117 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
120 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
121 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
122 have one already. */
124 static void
125 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
126 tree vectype)
128 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
129 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
130 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
133 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
134 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
135 be different from the vector type of the final pattern statement. */
137 static inline void
138 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
139 tree vectype = NULL_TREE)
141 vec_info *vinfo = stmt_info->vinfo;
142 if (vectype)
144 gcc_assert (!vinfo_for_stmt (new_stmt));
145 stmt_vec_info new_stmt_info = new_stmt_vec_info (new_stmt, vinfo);
146 set_vinfo_for_stmt (new_stmt, new_stmt_info);
147 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
149 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
150 new_stmt);
153 static inline void
154 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
156 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
157 append_pattern_def_seq (stmt_info, stmt);
160 /* The caller wants to perform new operations on vect_external variable
161 VAR, so that the result of the operations would also be vect_external.
162 Return the edge on which the operations can be performed, if one exists.
163 Return null if the operations should instead be treated as part of
164 the pattern that needs them. */
166 static edge
167 vect_get_external_def_edge (vec_info *vinfo, tree var)
169 edge e = NULL;
170 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
172 e = loop_preheader_edge (loop_vinfo->loop);
173 if (!SSA_NAME_IS_DEFAULT_DEF (var))
175 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
176 if (bb == NULL
177 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
178 e = NULL;
181 return e;
184 /* Return true if the target supports a vector version of CODE,
185 where CODE is known to map to a direct optab. ITYPE specifies
186 the type of (some of) the scalar inputs and OTYPE specifies the
187 type of the scalar result.
189 If CODE allows the inputs and outputs to have different type
190 (such as for WIDEN_SUM_EXPR), it is the input mode rather
191 than the output mode that determines the appropriate target pattern.
192 Operand 0 of the target pattern then specifies the mode that the output
193 must have.
195 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
196 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
197 is nonnull. */
199 static bool
200 vect_supportable_direct_optab_p (tree otype, tree_code code,
201 tree itype, tree *vecotype_out,
202 tree *vecitype_out = NULL)
204 tree vecitype = get_vectype_for_scalar_type (itype);
205 if (!vecitype)
206 return false;
208 tree vecotype = get_vectype_for_scalar_type (otype);
209 if (!vecotype)
210 return false;
212 optab optab = optab_for_tree_code (code, vecitype, optab_default);
213 if (!optab)
214 return false;
216 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
217 if (icode == CODE_FOR_nothing
218 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
219 return false;
221 *vecotype_out = vecotype;
222 if (vecitype_out)
223 *vecitype_out = vecitype;
224 return true;
227 /* Round bit precision PRECISION up to a full element. */
229 static unsigned int
230 vect_element_precision (unsigned int precision)
232 precision = 1 << ceil_log2 (precision);
233 return MAX (precision, BITS_PER_UNIT);
236 /* If OP is defined by a statement that's being considered for vectorization,
237 return information about that statement, otherwise return NULL. */
239 static stmt_vec_info
240 vect_get_internal_def (vec_info *vinfo, tree op)
242 vect_def_type dt;
243 gimple *def_stmt;
244 if (TREE_CODE (op) != SSA_NAME
245 || !vect_is_simple_use (op, vinfo, &dt, &def_stmt)
246 || dt != vect_internal_def)
247 return NULL;
249 return vinfo_for_stmt (def_stmt);
252 /* Check whether NAME, an ssa-name used in USE_STMT,
253 is a result of a type promotion, such that:
254 DEF_STMT: NAME = NOP (name0)
255 If CHECK_SIGN is TRUE, check that either both types are signed or both are
256 unsigned. */
258 static bool
259 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
260 tree *orig_type, gimple **def_stmt, bool *promotion)
262 stmt_vec_info stmt_vinfo;
263 tree type = TREE_TYPE (name);
264 tree oprnd0;
265 enum vect_def_type dt;
267 stmt_vinfo = vinfo_for_stmt (use_stmt);
268 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, def_stmt))
269 return false;
271 if (dt != vect_internal_def
272 && dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 if (!*def_stmt)
276 return false;
278 if (!is_gimple_assign (*def_stmt))
279 return false;
281 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
282 return false;
284 oprnd0 = gimple_assign_rhs1 (*def_stmt);
286 *orig_type = TREE_TYPE (oprnd0);
287 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
288 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
289 return false;
291 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
292 *promotion = true;
293 else
294 *promotion = false;
296 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
297 return false;
299 return true;
302 /* Holds information about an input operand after some sign changes
303 and type promotions have been peeled away. */
304 struct vect_unpromoted_value {
305 vect_unpromoted_value ();
307 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
309 /* The value obtained after peeling away zero or more casts. */
310 tree op;
312 /* The type of OP. */
313 tree type;
315 /* The definition type of OP. */
316 vect_def_type dt;
318 /* If OP is the result of peeling at least one cast, and if the cast
319 of OP itself is a vectorizable statement, CASTER identifies that
320 statement, otherwise it is null. */
321 stmt_vec_info caster;
324 inline vect_unpromoted_value::vect_unpromoted_value ()
325 : op (NULL_TREE),
326 type (NULL_TREE),
327 dt (vect_uninitialized_def),
328 caster (NULL)
332 /* Set the operand to OP_IN, its definition type to DT_IN, and the
333 statement that casts it to CASTER_IN. */
335 inline void
336 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
337 stmt_vec_info caster_in)
339 op = op_in;
340 type = TREE_TYPE (op);
341 dt = dt_in;
342 caster = caster_in;
345 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
346 to reach some vectorizable inner operand OP', continuing as long as it
347 is possible to convert OP' back to OP using a possible sign change
348 followed by a possible promotion P. Return this OP', or null if OP is
349 not a vectorizable SSA name. If there is a promotion P, describe its
350 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
351 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
352 have more than one user.
354 A successful return means that it is possible to go from OP' to OP
355 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
356 whereas the cast from UNPROM to OP might be a promotion, a sign
357 change, or a nop.
359 E.g. say we have:
361 signed short *ptr = ...;
362 signed short C = *ptr;
363 unsigned short B = (unsigned short) C; // sign change
364 signed int A = (signed int) B; // unsigned promotion
365 ...possible other uses of A...
366 unsigned int OP = (unsigned int) A; // sign change
368 In this case it's possible to go directly from C to OP using:
370 OP = (unsigned int) (unsigned short) C;
371 +------------+ +--------------+
372 promotion sign change
374 so OP' would be C. The input to the promotion is B, so UNPROM
375 would describe B. */
377 static tree
378 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
379 vect_unpromoted_value *unprom,
380 bool *single_use_p = NULL)
382 tree res = NULL_TREE;
383 tree op_type = TREE_TYPE (op);
384 unsigned int orig_precision = TYPE_PRECISION (op_type);
385 stmt_vec_info caster = NULL;
386 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
388 /* See whether OP is simple enough to vectorize. */
389 gimple *def_stmt;
390 vect_def_type dt;
391 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt))
392 break;
394 /* If OP is the input of a demotion, skip over it to see whether
395 OP is itself the result of a promotion. If so, the combined
396 effect of the promotion and the demotion might fit the required
397 pattern, otherwise neither operation fits.
399 This copes with cases such as the result of an arithmetic
400 operation being truncated before being stored, and where that
401 arithmetic operation has been recognized as an over-widened one. */
402 if (TYPE_PRECISION (op_type) <= orig_precision)
404 /* Use OP as the UNPROM described above if we haven't yet
405 found a promotion, or if using the new input preserves the
406 sign of the previous promotion. */
407 if (!res
408 || TYPE_PRECISION (unprom->type) == orig_precision
409 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
410 unprom->set_op (op, dt, caster);
411 /* Stop if we've already seen a promotion and if this
412 conversion does more than change the sign. */
413 else if (TYPE_PRECISION (op_type)
414 != TYPE_PRECISION (unprom->type))
415 break;
417 /* The sequence now extends to OP. */
418 res = op;
421 /* See whether OP is defined by a cast. Record it as CASTER if
422 the cast is potentially vectorizable. */
423 if (!def_stmt)
424 break;
425 if (dt == vect_internal_def)
427 caster = vinfo_for_stmt (def_stmt);
428 /* Ignore pattern statements, since we don't link uses for them. */
429 if (single_use_p
430 && !STMT_VINFO_RELATED_STMT (caster)
431 && !has_single_use (res))
432 *single_use_p = false;
434 else
435 caster = NULL;
436 gassign *assign = dyn_cast <gassign *> (def_stmt);
437 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
438 break;
440 /* Continue with the input to the cast. */
441 op = gimple_assign_rhs1 (def_stmt);
442 op_type = TREE_TYPE (op);
444 return res;
447 /* OP is an integer operand to an operation that returns TYPE, and we
448 want to treat the operation as a widening one. So far we can treat
449 it as widening from *COMMON_TYPE.
451 Return true if OP is suitable for such a widening operation,
452 either widening from *COMMON_TYPE or from some supertype of it.
453 Update *COMMON_TYPE to the supertype in the latter case.
455 SHIFT_P is true if OP is a shift amount. */
457 static bool
458 vect_joust_widened_integer (tree type, bool shift_p, tree op,
459 tree *common_type)
461 /* Calculate the minimum precision required by OP, without changing
462 the sign of either operand. */
463 unsigned int precision;
464 if (shift_p)
466 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
467 return false;
468 precision = TREE_INT_CST_LOW (op);
470 else
472 precision = wi::min_precision (wi::to_widest (op),
473 TYPE_SIGN (*common_type));
474 if (precision * 2 > TYPE_PRECISION (type))
475 return false;
478 /* If OP requires a wider type, switch to that type. The checks
479 above ensure that this is still narrower than the result. */
480 precision = vect_element_precision (precision);
481 if (TYPE_PRECISION (*common_type) < precision)
482 *common_type = build_nonstandard_integer_type
483 (precision, TYPE_UNSIGNED (*common_type));
484 return true;
487 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
488 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
490 static bool
491 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
493 if (types_compatible_p (*common_type, new_type))
494 return true;
496 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
497 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
498 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
499 return true;
501 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
502 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
503 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
505 *common_type = new_type;
506 return true;
509 /* We have mismatched signs, with the signed type being
510 no wider than the unsigned type. In this case we need
511 a wider signed type. */
512 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
513 TYPE_PRECISION (new_type));
514 precision *= 2;
515 if (precision * 2 > TYPE_PRECISION (type))
516 return false;
518 *common_type = build_nonstandard_integer_type (precision, false);
519 return true;
522 /* Check whether STMT_INFO can be viewed as a tree of integer operations
523 in which each node either performs CODE or WIDENED_CODE, and where
524 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
525 specifies the maximum number of leaf operands. SHIFT_P says whether
526 CODE and WIDENED_CODE are some sort of shift.
528 If STMT_INFO is such a tree, return the number of leaf operands
529 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
530 to a type that (a) is narrower than the result of STMT_INFO and
531 (b) can hold all leaf operand values.
533 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
534 exists. */
536 static unsigned int
537 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
538 tree_code widened_code, bool shift_p,
539 unsigned int max_nops,
540 vect_unpromoted_value *unprom, tree *common_type)
542 /* Check for an integer operation with the right code. */
543 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
544 if (!assign)
545 return 0;
547 tree_code rhs_code = gimple_assign_rhs_code (assign);
548 if (rhs_code != code && rhs_code != widened_code)
549 return 0;
551 tree type = gimple_expr_type (assign);
552 if (!INTEGRAL_TYPE_P (type))
553 return 0;
555 /* Assume that both operands will be leaf operands. */
556 max_nops -= 2;
558 /* Check the operands. */
559 unsigned int next_op = 0;
560 for (unsigned int i = 0; i < 2; ++i)
562 vect_unpromoted_value *this_unprom = &unprom[next_op];
563 unsigned int nops = 1;
564 tree op = gimple_op (assign, i + 1);
565 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
567 /* We already have a common type from earlier operands.
568 Update it to account for OP. */
569 this_unprom->set_op (op, vect_constant_def);
570 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
571 return 0;
573 else
575 /* Only allow shifts by constants. */
576 if (shift_p && i == 1)
577 return 0;
579 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
580 this_unprom))
581 return 0;
583 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
585 /* The operand isn't widened. If STMT_INFO has the code
586 for an unwidened operation, recursively check whether
587 this operand is a node of the tree. */
588 if (rhs_code != code
589 || max_nops == 0
590 || this_unprom->dt != vect_internal_def)
591 return 0;
593 /* Give back the leaf slot allocated above now that we're
594 not treating this as a leaf operand. */
595 max_nops += 1;
597 /* Recursively process the definition of the operand. */
598 stmt_vec_info def_stmt_info
599 = vinfo_for_stmt (SSA_NAME_DEF_STMT (this_unprom->op));
600 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
601 shift_p, max_nops, this_unprom,
602 common_type);
603 if (nops == 0)
604 return 0;
606 max_nops -= nops;
608 else
610 /* Make sure that the operand is narrower than the result. */
611 if (TYPE_PRECISION (this_unprom->type) * 2
612 > TYPE_PRECISION (type))
613 return 0;
615 /* Update COMMON_TYPE for the new operand. */
616 if (i == 0)
617 *common_type = this_unprom->type;
618 else if (!vect_joust_widened_type (type, this_unprom->type,
619 common_type))
620 return 0;
623 next_op += nops;
625 return next_op;
628 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
629 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
631 static tree
632 vect_recog_temp_ssa_var (tree type, gimple *stmt)
634 return make_temp_ssa_name (type, stmt, "patt");
637 /* STMT2_INFO describes a type conversion that could be split into STMT1
638 followed by a version of STMT2_INFO that takes NEW_RHS as its first
639 input. Try to do this using pattern statements, returning true on
640 success. */
642 static bool
643 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
644 gimple *stmt1, tree vectype)
646 if (is_pattern_stmt_p (stmt2_info))
648 /* STMT2_INFO is part of a pattern. Get the statement to which
649 the pattern is attached. */
650 stmt_vec_info orig_stmt2_info
651 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt2_info));
652 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE, vect_location,
657 "Splitting pattern statement: ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
661 /* Since STMT2_INFO is a pattern statement, we can change it
662 in-situ without worrying about changing the code for the
663 containing block. */
664 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
666 if (dump_enabled_p ())
668 dump_printf_loc (MSG_NOTE, vect_location, "into: ");
669 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
670 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
674 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
675 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info->stmt)
676 /* STMT2_INFO is the actual pattern statement. Add STMT1
677 to the end of the definition sequence. */
678 gimple_seq_add_stmt_without_update (def_seq, stmt1);
679 else
681 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
682 before it. */
683 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
684 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
686 return true;
688 else
690 /* STMT2_INFO doesn't yet have a pattern. Try to create a
691 two-statement pattern now. */
692 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
693 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
694 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
695 if (!lhs_vectype)
696 return false;
698 if (dump_enabled_p ())
700 dump_printf_loc (MSG_NOTE, vect_location,
701 "Splitting statement: ");
702 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
705 /* Add STMT1 as a singleton pattern definition sequence. */
706 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
707 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
708 gimple_seq_add_stmt_without_update (def_seq, stmt1);
710 /* Build the second of the two pattern statements. */
711 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
712 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
713 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
715 if (dump_enabled_p ())
717 dump_printf_loc (MSG_NOTE, vect_location,
718 "into pattern statements: ");
719 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
720 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
721 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt2, 0);
724 return true;
728 /* Convert UNPROM to TYPE and return the result, adding new statements
729 to STMT_INFO's pattern definition statements if no better way is
730 available. VECTYPE is the vector form of TYPE. */
732 static tree
733 vect_convert_input (stmt_vec_info stmt_info, tree type,
734 vect_unpromoted_value *unprom, tree vectype)
736 /* Check for a no-op conversion. */
737 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
738 return unprom->op;
740 /* Allow the caller to create constant vect_unpromoted_values. */
741 if (TREE_CODE (unprom->op) == INTEGER_CST)
742 return wide_int_to_tree (type, wi::to_widest (unprom->op));
744 /* See if we can reuse an existing result. */
745 if (unprom->caster)
747 tree lhs = gimple_get_lhs (unprom->caster->stmt);
748 if (types_compatible_p (TREE_TYPE (lhs), type))
749 return lhs;
752 /* We need a new conversion statement. */
753 tree new_op = vect_recog_temp_ssa_var (type, NULL);
754 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
756 /* If the operation is the input to a vectorizable cast, try splitting
757 that cast into two, taking the required result as a mid-way point. */
758 if (unprom->caster)
760 tree lhs = gimple_get_lhs (unprom->caster->stmt);
761 if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
762 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
763 && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
764 && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
765 return new_op;
768 /* If OP is an external value, see if we can insert the new statement
769 on an incoming edge. */
770 if (unprom->dt == vect_external_def)
771 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
773 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
774 gcc_assert (!new_bb);
775 return new_op;
778 /* As a (common) last resort, add the statement to the pattern itself. */
779 append_pattern_def_seq (stmt_info, new_stmt, vectype);
780 return new_op;
783 /* Invoke vect_convert_input for N elements of UNPROM and store the
784 result in the corresponding elements of RESULT. */
786 static void
787 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
788 tree *result, tree type, vect_unpromoted_value *unprom,
789 tree vectype)
791 for (unsigned int i = 0; i < n; ++i)
793 unsigned int j;
794 for (j = 0; j < i; ++j)
795 if (unprom[j].op == unprom[i].op)
796 break;
797 if (j < i)
798 result[i] = result[j];
799 else
800 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
804 /* The caller has created a (possibly empty) sequence of pattern definition
805 statements followed by a single statement PATTERN_STMT. Cast the result
806 of this final statement to TYPE. If a new statement is needed, add
807 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
808 and return the new statement, otherwise return PATTERN_STMT as-is.
809 VECITYPE is the vector form of PATTERN_STMT's result type. */
811 static gimple *
812 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
813 tree vecitype)
815 tree lhs = gimple_get_lhs (pattern_stmt);
816 if (!types_compatible_p (type, TREE_TYPE (lhs)))
818 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
819 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
820 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
822 return pattern_stmt;
825 /* Return true if STMT_VINFO describes a reduction for which reassociation
826 is allowed. If STMT_INFO is part of a group, assume that it's part of
827 a reduction chain and optimistically assume that all statements
828 except the last allow reassociation. */
830 static bool
831 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
833 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
834 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
835 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
838 /* As above, but also require it to have code CODE and to be a reduction
839 in the outermost loop. When returning true, store the operands in
840 *OP0_OUT and *OP1_OUT. */
842 static bool
843 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
844 tree *op0_out, tree *op1_out)
846 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
847 if (!loop_info)
848 return false;
850 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
851 if (!assign || gimple_assign_rhs_code (assign) != code)
852 return false;
854 /* We don't allow changing the order of the computation in the inner-loop
855 when doing outer-loop vectorization. */
856 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
857 if (loop && nested_in_vect_loop_p (loop, assign))
858 return false;
860 if (!vect_reassociating_reduction_p (stmt_info))
861 return false;
863 *op0_out = gimple_assign_rhs1 (assign);
864 *op1_out = gimple_assign_rhs2 (assign);
865 return true;
868 /* Function vect_recog_dot_prod_pattern
870 Try to find the following pattern:
872 type x_t, y_t;
873 TYPE1 prod;
874 TYPE2 sum = init;
875 loop:
876 sum_0 = phi <init, sum_1>
877 S1 x_t = ...
878 S2 y_t = ...
879 S3 x_T = (TYPE1) x_t;
880 S4 y_T = (TYPE1) y_t;
881 S5 prod = x_T * y_T;
882 [S6 prod = (TYPE2) prod; #optional]
883 S7 sum_1 = prod + sum_0;
885 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
886 same size of 'TYPE1' or bigger. This is a special case of a reduction
887 computation.
889 Input:
891 * STMT_VINFO: The stmt from which the pattern search begins. In the
892 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
893 will be detected.
895 Output:
897 * TYPE_OUT: The type of the output of this pattern.
899 * Return value: A new stmt that will be used to replace the sequence of
900 stmts that constitute the pattern. In this case it will be:
901 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
903 Note: The dot-prod idiom is a widening reduction pattern that is
904 vectorized without preserving all the intermediate results. It
905 produces only N/2 (widened) results (by summing up pairs of
906 intermediate results) rather than all N results. Therefore, we
907 cannot allow this pattern when we want to get all the results and in
908 the correct order (as is the case when this computation is in an
909 inner-loop nested in an outer-loop that us being vectorized). */
911 static gimple *
912 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
914 tree oprnd0, oprnd1;
915 gimple *last_stmt = stmt_vinfo->stmt;
916 vec_info *vinfo = stmt_vinfo->vinfo;
917 tree type, half_type;
918 gimple *pattern_stmt;
919 tree var;
921 /* Look for the following pattern
922 DX = (TYPE1) X;
923 DY = (TYPE1) Y;
924 DPROD = DX * DY;
925 DDPROD = (TYPE2) DPROD;
926 sum_1 = DDPROD + sum_0;
927 In which
928 - DX is double the size of X
929 - DY is double the size of Y
930 - DX, DY, DPROD all have the same type
931 - sum is the same size of DPROD or bigger
932 - sum has been recognized as a reduction variable.
934 This is equivalent to:
935 DPROD = X w* Y; #widen mult
936 sum_1 = DPROD w+ sum_0; #widen summation
938 DPROD = X w* Y; #widen mult
939 sum_1 = DPROD + sum_0; #summation
942 /* Starting from LAST_STMT, follow the defs of its uses in search
943 of the above pattern. */
945 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
946 &oprnd0, &oprnd1))
947 return NULL;
949 type = gimple_expr_type (last_stmt);
951 vect_unpromoted_value unprom_mult;
952 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
954 /* So far so good. Since last_stmt was detected as a (summation) reduction,
955 we know that oprnd1 is the reduction variable (defined by a loop-header
956 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
957 Left to check that oprnd0 is defined by a (widen_)mult_expr */
958 if (!oprnd0)
959 return NULL;
961 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
962 if (!mult_vinfo)
963 return NULL;
965 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
966 inside the loop (in case we are analyzing an outer-loop). */
967 vect_unpromoted_value unprom0[2];
968 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
969 false, 2, unprom0, &half_type))
970 return NULL;
972 /* If there are two widening operations, make sure they agree on
973 the sign of the extension. */
974 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
975 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
976 return NULL;
978 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
980 tree half_vectype;
981 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
982 type_out, &half_vectype))
983 return NULL;
985 /* Get the inputs in the appropriate types. */
986 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
987 tree mult_oprnd[2];
988 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
989 unprom0, half_vectype);
991 var = vect_recog_temp_ssa_var (type, NULL);
992 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
993 mult_oprnd[0], mult_oprnd[1], oprnd1);
995 return pattern_stmt;
999 /* Function vect_recog_sad_pattern
1001 Try to find the following Sum of Absolute Difference (SAD) pattern:
1003 type x_t, y_t;
1004 signed TYPE1 diff, abs_diff;
1005 TYPE2 sum = init;
1006 loop:
1007 sum_0 = phi <init, sum_1>
1008 S1 x_t = ...
1009 S2 y_t = ...
1010 S3 x_T = (TYPE1) x_t;
1011 S4 y_T = (TYPE1) y_t;
1012 S5 diff = x_T - y_T;
1013 S6 abs_diff = ABS_EXPR <diff>;
1014 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1015 S8 sum_1 = abs_diff + sum_0;
1017 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1018 same size of 'TYPE1' or bigger. This is a special case of a reduction
1019 computation.
1021 Input:
1023 * STMT_VINFO: The stmt from which the pattern search begins. In the
1024 example, when this function is called with S8, the pattern
1025 {S3,S4,S5,S6,S7,S8} will be detected.
1027 Output:
1029 * TYPE_OUT: The type of the output of this pattern.
1031 * Return value: A new stmt that will be used to replace the sequence of
1032 stmts that constitute the pattern. In this case it will be:
1033 SAD_EXPR <x_t, y_t, sum_0>
1036 static gimple *
1037 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1039 gimple *last_stmt = stmt_vinfo->stmt;
1040 vec_info *vinfo = stmt_vinfo->vinfo;
1041 tree half_type;
1043 /* Look for the following pattern
1044 DX = (TYPE1) X;
1045 DY = (TYPE1) Y;
1046 DDIFF = DX - DY;
1047 DAD = ABS_EXPR <DDIFF>;
1048 DDPROD = (TYPE2) DPROD;
1049 sum_1 = DAD + sum_0;
1050 In which
1051 - DX is at least double the size of X
1052 - DY is at least double the size of Y
1053 - DX, DY, DDIFF, DAD all have the same type
1054 - sum is the same size of DAD or bigger
1055 - sum has been recognized as a reduction variable.
1057 This is equivalent to:
1058 DDIFF = X w- Y; #widen sub
1059 DAD = ABS_EXPR <DDIFF>;
1060 sum_1 = DAD w+ sum_0; #widen summation
1062 DDIFF = X w- Y; #widen sub
1063 DAD = ABS_EXPR <DDIFF>;
1064 sum_1 = DAD + sum_0; #summation
1067 /* Starting from LAST_STMT, follow the defs of its uses in search
1068 of the above pattern. */
1070 tree plus_oprnd0, plus_oprnd1;
1071 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1072 &plus_oprnd0, &plus_oprnd1))
1073 return NULL;
1075 tree sum_type = gimple_expr_type (last_stmt);
1077 /* Any non-truncating sequence of conversions is OK here, since
1078 with a successful match, the result of the ABS(U) is known to fit
1079 within the nonnegative range of the result type. (It cannot be the
1080 negative of the minimum signed value due to the range of the widening
1081 MINUS_EXPR.) */
1082 vect_unpromoted_value unprom_abs;
1083 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1084 &unprom_abs);
1086 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1087 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1088 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1089 Then check that plus_oprnd0 is defined by an abs_expr. */
1091 if (!plus_oprnd0)
1092 return NULL;
1094 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1095 if (!abs_stmt_vinfo)
1096 return NULL;
1098 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1099 inside the loop (in case we are analyzing an outer-loop). */
1100 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1101 if (!abs_stmt
1102 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1103 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1104 return NULL;
1106 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1107 tree abs_type = TREE_TYPE (abs_oprnd);
1108 if (TYPE_UNSIGNED (abs_type))
1109 return NULL;
1111 /* Peel off conversions from the ABS input. This can involve sign
1112 changes (e.g. from an unsigned subtraction to a signed ABS input)
1113 or signed promotion, but it can't include unsigned promotion.
1114 (Note that ABS of an unsigned promotion should have been folded
1115 away before now anyway.) */
1116 vect_unpromoted_value unprom_diff;
1117 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1118 &unprom_diff);
1119 if (!abs_oprnd)
1120 return NULL;
1121 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1122 && TYPE_UNSIGNED (unprom_diff.type))
1123 return NULL;
1125 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1126 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1127 if (!diff_stmt_vinfo)
1128 return NULL;
1130 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1131 inside the loop (in case we are analyzing an outer-loop). */
1132 vect_unpromoted_value unprom[2];
1133 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1134 false, 2, unprom, &half_type))
1135 return NULL;
1137 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1139 tree half_vectype;
1140 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1141 type_out, &half_vectype))
1142 return NULL;
1144 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1145 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1146 tree sad_oprnd[2];
1147 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1148 unprom, half_vectype);
1150 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1151 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1152 sad_oprnd[1], plus_oprnd1);
1154 return pattern_stmt;
1157 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1158 so that it can be treated as though it had the form:
1160 A_TYPE a;
1161 B_TYPE b;
1162 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1163 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1164 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1165 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1166 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1168 Try to replace the pattern with:
1170 A_TYPE a;
1171 B_TYPE b;
1172 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1173 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1174 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1175 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1177 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1179 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1180 name of the pattern being matched, for dump purposes. */
1182 static gimple *
1183 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1184 tree_code orig_code, tree_code wide_code,
1185 bool shift_p, const char *name)
1187 gimple *last_stmt = last_stmt_info->stmt;
1189 vect_unpromoted_value unprom[2];
1190 tree half_type;
1191 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1192 shift_p, 2, unprom, &half_type))
1193 return NULL;
1195 /* Pattern detected. */
1196 vect_pattern_detected (name, last_stmt);
1198 tree type = gimple_expr_type (last_stmt);
1199 tree itype = type;
1200 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1201 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1202 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1203 TYPE_UNSIGNED (half_type));
1205 /* Check target support */
1206 tree vectype = get_vectype_for_scalar_type (half_type);
1207 tree vecitype = get_vectype_for_scalar_type (itype);
1208 enum tree_code dummy_code;
1209 int dummy_int;
1210 auto_vec<tree> dummy_vec;
1211 if (!vectype
1212 || !vecitype
1213 || !supportable_widening_operation (wide_code, last_stmt,
1214 vecitype, vectype,
1215 &dummy_code, &dummy_code,
1216 &dummy_int, &dummy_vec))
1217 return NULL;
1219 *type_out = get_vectype_for_scalar_type (type);
1220 if (!*type_out)
1221 return NULL;
1223 STMT_VINFO_PATTERN_DEF_SEQ (last_stmt_info) = NULL;
1224 tree oprnd[2];
1225 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1227 tree var = vect_recog_temp_ssa_var (itype, NULL);
1228 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1229 oprnd[0], oprnd[1]);
1231 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1234 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1235 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1237 static gimple *
1238 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1240 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1241 WIDEN_MULT_EXPR, false,
1242 "vect_recog_widen_mult_pattern");
1245 /* Function vect_recog_pow_pattern
1247 Try to find the following pattern:
1249 x = POW (y, N);
1251 with POW being one of pow, powf, powi, powif and N being
1252 either 2 or 0.5.
1254 Input:
1256 * STMT_VINFO: The stmt from which the pattern search begins.
1258 Output:
1260 * TYPE_OUT: The type of the output of this pattern.
1262 * Return value: A new stmt that will be used to replace the sequence of
1263 stmts that constitute the pattern. In this case it will be:
1264 x = x * x
1266 x = sqrt (x)
1269 static gimple *
1270 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1272 gimple *last_stmt = stmt_vinfo->stmt;
1273 tree base, exp;
1274 gimple *stmt;
1275 tree var;
1277 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1278 return NULL;
1280 switch (gimple_call_combined_fn (last_stmt))
1282 CASE_CFN_POW:
1283 CASE_CFN_POWI:
1284 break;
1286 default:
1287 return NULL;
1290 base = gimple_call_arg (last_stmt, 0);
1291 exp = gimple_call_arg (last_stmt, 1);
1292 if (TREE_CODE (exp) != REAL_CST
1293 && TREE_CODE (exp) != INTEGER_CST)
1295 if (flag_unsafe_math_optimizations
1296 && TREE_CODE (base) == REAL_CST
1297 && !gimple_call_internal_p (last_stmt))
1299 combined_fn log_cfn;
1300 built_in_function exp_bfn;
1301 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1303 case BUILT_IN_POW:
1304 log_cfn = CFN_BUILT_IN_LOG;
1305 exp_bfn = BUILT_IN_EXP;
1306 break;
1307 case BUILT_IN_POWF:
1308 log_cfn = CFN_BUILT_IN_LOGF;
1309 exp_bfn = BUILT_IN_EXPF;
1310 break;
1311 case BUILT_IN_POWL:
1312 log_cfn = CFN_BUILT_IN_LOGL;
1313 exp_bfn = BUILT_IN_EXPL;
1314 break;
1315 default:
1316 return NULL;
1318 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1319 tree exp_decl = builtin_decl_implicit (exp_bfn);
1320 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1321 does that, but if C is a power of 2, we want to use
1322 exp2 (log2 (C) * x) in the non-vectorized version, but for
1323 vectorization we don't have vectorized exp2. */
1324 if (logc
1325 && TREE_CODE (logc) == REAL_CST
1326 && exp_decl
1327 && lookup_attribute ("omp declare simd",
1328 DECL_ATTRIBUTES (exp_decl)))
1330 cgraph_node *node = cgraph_node::get_create (exp_decl);
1331 if (node->simd_clones == NULL)
1333 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1334 || node->definition)
1335 return NULL;
1336 expand_simd_clones (node);
1337 if (node->simd_clones == NULL)
1338 return NULL;
1340 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1341 if (!*type_out)
1342 return NULL;
1343 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1344 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1345 new_pattern_def_seq (stmt_vinfo, g);
1346 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1347 g = gimple_build_call (exp_decl, 1, def);
1348 gimple_call_set_lhs (g, res);
1349 return g;
1353 return NULL;
1356 /* We now have a pow or powi builtin function call with a constant
1357 exponent. */
1359 /* Catch squaring. */
1360 if ((tree_fits_shwi_p (exp)
1361 && tree_to_shwi (exp) == 2)
1362 || (TREE_CODE (exp) == REAL_CST
1363 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1365 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1366 TREE_TYPE (base), type_out))
1367 return NULL;
1369 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1370 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1371 return stmt;
1374 /* Catch square root. */
1375 if (TREE_CODE (exp) == REAL_CST
1376 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1378 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1379 if (*type_out
1380 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1381 OPTIMIZE_FOR_SPEED))
1383 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1384 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1385 gimple_call_set_lhs (stmt, var);
1386 gimple_call_set_nothrow (stmt, true);
1387 return stmt;
1391 return NULL;
1395 /* Function vect_recog_widen_sum_pattern
1397 Try to find the following pattern:
1399 type x_t;
1400 TYPE x_T, sum = init;
1401 loop:
1402 sum_0 = phi <init, sum_1>
1403 S1 x_t = *p;
1404 S2 x_T = (TYPE) x_t;
1405 S3 sum_1 = x_T + sum_0;
1407 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1408 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1409 a special case of a reduction computation.
1411 Input:
1413 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1414 when this function is called with S3, the pattern {S2,S3} will be detected.
1416 Output:
1418 * TYPE_OUT: The type of the output of this pattern.
1420 * Return value: A new stmt that will be used to replace the sequence of
1421 stmts that constitute the pattern. In this case it will be:
1422 WIDEN_SUM <x_t, sum_0>
1424 Note: The widening-sum idiom is a widening reduction pattern that is
1425 vectorized without preserving all the intermediate results. It
1426 produces only N/2 (widened) results (by summing up pairs of
1427 intermediate results) rather than all N results. Therefore, we
1428 cannot allow this pattern when we want to get all the results and in
1429 the correct order (as is the case when this computation is in an
1430 inner-loop nested in an outer-loop that us being vectorized). */
1432 static gimple *
1433 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1435 gimple *last_stmt = stmt_vinfo->stmt;
1436 tree oprnd0, oprnd1;
1437 vec_info *vinfo = stmt_vinfo->vinfo;
1438 tree type;
1439 gimple *pattern_stmt;
1440 tree var;
1442 /* Look for the following pattern
1443 DX = (TYPE) X;
1444 sum_1 = DX + sum_0;
1445 In which DX is at least double the size of X, and sum_1 has been
1446 recognized as a reduction variable.
1449 /* Starting from LAST_STMT, follow the defs of its uses in search
1450 of the above pattern. */
1452 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1453 &oprnd0, &oprnd1))
1454 return NULL;
1456 type = gimple_expr_type (last_stmt);
1458 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1459 we know that oprnd1 is the reduction variable (defined by a loop-header
1460 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1461 Left to check that oprnd0 is defined by a cast from type 'type' to type
1462 'TYPE'. */
1464 vect_unpromoted_value unprom0;
1465 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1466 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1467 return NULL;
1469 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1471 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1472 type_out))
1473 return NULL;
1475 var = vect_recog_temp_ssa_var (type, NULL);
1476 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1478 return pattern_stmt;
1481 /* Recognize cases in which an operation is performed in one type WTYPE
1482 but could be done more efficiently in a narrower type NTYPE. For example,
1483 if we have:
1485 ATYPE a; // narrower than NTYPE
1486 BTYPE b; // narrower than NTYPE
1487 WTYPE aw = (WTYPE) a;
1488 WTYPE bw = (WTYPE) b;
1489 WTYPE res = aw + bw; // only uses of aw and bw
1491 then it would be more efficient to do:
1493 NTYPE an = (NTYPE) a;
1494 NTYPE bn = (NTYPE) b;
1495 NTYPE resn = an + bn;
1496 WTYPE res = (WTYPE) resn;
1498 Other situations include things like:
1500 ATYPE a; // NTYPE or narrower
1501 WTYPE aw = (WTYPE) a;
1502 WTYPE res = aw + b;
1504 when only "(NTYPE) res" is significant. In that case it's more efficient
1505 to truncate "b" and do the operation on NTYPE instead:
1507 NTYPE an = (NTYPE) a;
1508 NTYPE bn = (NTYPE) b; // truncation
1509 NTYPE resn = an + bn;
1510 WTYPE res = (WTYPE) resn;
1512 All users of "res" should then use "resn" instead, making the final
1513 statement dead (not marked as relevant). The final statement is still
1514 needed to maintain the type correctness of the IR.
1516 vect_determine_precisions has already determined the minimum
1517 precison of the operation and the minimum precision required
1518 by users of the result. */
1520 static gimple *
1521 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1523 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1524 if (!last_stmt)
1525 return NULL;
1527 /* See whether we have found that this operation can be done on a
1528 narrower type without changing its semantics. */
1529 unsigned int new_precision = last_stmt_info->operation_precision;
1530 if (!new_precision)
1531 return NULL;
1533 vec_info *vinfo = last_stmt_info->vinfo;
1534 tree lhs = gimple_assign_lhs (last_stmt);
1535 tree type = TREE_TYPE (lhs);
1536 tree_code code = gimple_assign_rhs_code (last_stmt);
1538 /* Keep the first operand of a COND_EXPR as-is: only the other two
1539 operands are interesting. */
1540 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1542 /* Check the operands. */
1543 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1544 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1545 unprom.quick_grow (nops);
1546 unsigned int min_precision = 0;
1547 bool single_use_p = false;
1548 for (unsigned int i = 0; i < nops; ++i)
1550 tree op = gimple_op (last_stmt, first_op + i);
1551 if (TREE_CODE (op) == INTEGER_CST)
1552 unprom[i].set_op (op, vect_constant_def);
1553 else if (TREE_CODE (op) == SSA_NAME)
1555 bool op_single_use_p = true;
1556 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1557 &op_single_use_p))
1558 return NULL;
1559 /* If:
1561 (1) N bits of the result are needed;
1562 (2) all inputs are widened from M<N bits; and
1563 (3) one operand OP is a single-use SSA name
1565 we can shift the M->N widening from OP to the output
1566 without changing the number or type of extensions involved.
1567 This then reduces the number of copies of STMT_INFO.
1569 If instead of (3) more than one operand is a single-use SSA name,
1570 shifting the extension to the output is even more of a win.
1572 If instead:
1574 (1) N bits of the result are needed;
1575 (2) one operand OP2 is widened from M2<N bits;
1576 (3) another operand OP1 is widened from M1<M2 bits; and
1577 (4) both OP1 and OP2 are single-use
1579 the choice is between:
1581 (a) truncating OP2 to M1, doing the operation on M1,
1582 and then widening the result to N
1584 (b) widening OP1 to M2, doing the operation on M2, and then
1585 widening the result to N
1587 Both shift the M2->N widening of the inputs to the output.
1588 (a) additionally shifts the M1->M2 widening to the output;
1589 it requires fewer copies of STMT_INFO but requires an extra
1590 M2->M1 truncation.
1592 Which is better will depend on the complexity and cost of
1593 STMT_INFO, which is hard to predict at this stage. However,
1594 a clear tie-breaker in favor of (b) is the fact that the
1595 truncation in (a) increases the length of the operation chain.
1597 If instead of (4) only one of OP1 or OP2 is single-use,
1598 (b) is still a win over doing the operation in N bits:
1599 it still shifts the M2->N widening on the single-use operand
1600 to the output and reduces the number of STMT_INFO copies.
1602 If neither operand is single-use then operating on fewer than
1603 N bits might lead to more extensions overall. Whether it does
1604 or not depends on global information about the vectorization
1605 region, and whether that's a good trade-off would again
1606 depend on the complexity and cost of the statements involved,
1607 as well as things like register pressure that are not normally
1608 modelled at this stage. We therefore ignore these cases
1609 and just optimize the clear single-use wins above.
1611 Thus we take the maximum precision of the unpromoted operands
1612 and record whether any operand is single-use. */
1613 if (unprom[i].dt == vect_internal_def)
1615 min_precision = MAX (min_precision,
1616 TYPE_PRECISION (unprom[i].type));
1617 single_use_p |= op_single_use_p;
1622 /* Although the operation could be done in operation_precision, we have
1623 to balance that against introducing extra truncations or extensions.
1624 Calculate the minimum precision that can be handled efficiently.
1626 The loop above determined that the operation could be handled
1627 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1628 extension from the inputs to the output without introducing more
1629 instructions, and would reduce the number of instructions required
1630 for STMT_INFO itself.
1632 vect_determine_precisions has also determined that the result only
1633 needs min_output_precision bits. Truncating by a factor of N times
1634 requires a tree of N - 1 instructions, so if TYPE is N times wider
1635 than min_output_precision, doing the operation in TYPE and truncating
1636 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1637 In contrast:
1639 - truncating the input to a unary operation and doing the operation
1640 in the new type requires at most N - 1 + 1 = N instructions per
1641 output vector
1643 - doing the same for a binary operation requires at most
1644 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1646 Both unary and binary operations require fewer instructions than
1647 this if the operands were extended from a suitable truncated form.
1648 Thus there is usually nothing to lose by doing operations in
1649 min_output_precision bits, but there can be something to gain. */
1650 if (!single_use_p)
1651 min_precision = last_stmt_info->min_output_precision;
1652 else
1653 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1655 /* Apply the minimum efficient precision we just calculated. */
1656 if (new_precision < min_precision)
1657 new_precision = min_precision;
1658 if (new_precision >= TYPE_PRECISION (type))
1659 return NULL;
1661 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1663 *type_out = get_vectype_for_scalar_type (type);
1664 if (!*type_out)
1665 return NULL;
1667 /* We've found a viable pattern. Get the new type of the operation. */
1668 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1669 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1671 /* We specifically don't check here whether the target supports the
1672 new operation, since it might be something that a later pattern
1673 wants to rewrite anyway. If targets have a minimum element size
1674 for some optabs, we should pattern-match smaller ops to larger ops
1675 where beneficial. */
1676 tree new_vectype = get_vectype_for_scalar_type (new_type);
1677 if (!new_vectype)
1678 return NULL;
1680 if (dump_enabled_p ())
1682 dump_printf_loc (MSG_NOTE, vect_location, "demoting ");
1683 dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
1684 dump_printf (MSG_NOTE, " to ");
1685 dump_generic_expr (MSG_NOTE, TDF_SLIM, new_type);
1686 dump_printf (MSG_NOTE, "\n");
1689 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1690 STMT_VINFO_PATTERN_DEF_SEQ (last_stmt_info) = NULL;
1691 tree ops[3] = {};
1692 for (unsigned int i = 1; i < first_op; ++i)
1693 ops[i - 1] = gimple_op (last_stmt, i);
1694 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1695 new_type, &unprom[0], new_vectype);
1697 /* Use the operation to produce a result of type NEW_TYPE. */
1698 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1699 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1700 ops[0], ops[1], ops[2]);
1701 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1703 if (dump_enabled_p ())
1705 dump_printf_loc (MSG_NOTE, vect_location,
1706 "created pattern stmt: ");
1707 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1710 pattern_stmt = vect_convert_output (last_stmt_info, type,
1711 pattern_stmt, new_vectype);
1713 return pattern_stmt;
1716 /* Recognize the patterns:
1718 ATYPE a; // narrower than TYPE
1719 BTYPE b; // narrower than TYPE
1720 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1721 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1723 where only the bottom half of avg is used. Try to transform them into:
1725 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1726 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1728 followed by:
1730 TYPE avg = (TYPE) avg';
1732 where NTYPE is no wider than half of TYPE. Since only the bottom half
1733 of avg is used, all or part of the cast of avg' should become redundant. */
1735 static gimple *
1736 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1738 /* Check for a shift right by one bit. */
1739 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1740 vec_info *vinfo = last_stmt_info->vinfo;
1741 if (!last_stmt
1742 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1743 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1744 return NULL;
1746 /* Check that the shift result is wider than the users of the
1747 result need (i.e. that narrowing would be a natural choice). */
1748 tree lhs = gimple_assign_lhs (last_stmt);
1749 tree type = TREE_TYPE (lhs);
1750 unsigned int target_precision
1751 = vect_element_precision (last_stmt_info->min_output_precision);
1752 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1753 return NULL;
1755 /* Get the definition of the shift input. */
1756 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1757 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1758 if (!plus_stmt_info)
1759 return NULL;
1761 /* Check whether the shift input can be seen as a tree of additions on
1762 2 or 3 widened inputs.
1764 Note that the pattern should be a win even if the result of one or
1765 more additions is reused elsewhere: if the pattern matches, we'd be
1766 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1767 internal_fn ifn = IFN_AVG_FLOOR;
1768 vect_unpromoted_value unprom[3];
1769 tree new_type;
1770 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1771 PLUS_EXPR, false, 3,
1772 unprom, &new_type);
1773 if (nops == 0)
1774 return NULL;
1775 if (nops == 3)
1777 /* Check that one operand is 1. */
1778 unsigned int i;
1779 for (i = 0; i < 3; ++i)
1780 if (integer_onep (unprom[i].op))
1781 break;
1782 if (i == 3)
1783 return NULL;
1784 /* Throw away the 1 operand and keep the other two. */
1785 if (i < 2)
1786 unprom[i] = unprom[2];
1787 ifn = IFN_AVG_CEIL;
1790 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1792 /* We know that:
1794 (a) the operation can be viewed as:
1796 TYPE widened0 = (TYPE) UNPROM[0];
1797 TYPE widened1 = (TYPE) UNPROM[1];
1798 TYPE tmp1 = widened0 + widened1 {+ 1};
1799 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1801 (b) the first two statements are equivalent to:
1803 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1804 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1806 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1807 where sensible;
1809 (d) all the operations can be performed correctly at twice the width of
1810 NEW_TYPE, due to the nature of the average operation; and
1812 (e) users of the result of the right shift need only TARGET_PRECISION
1813 bits, where TARGET_PRECISION is no more than half of TYPE's
1814 precision.
1816 Under these circumstances, the only situation in which NEW_TYPE
1817 could be narrower than TARGET_PRECISION is if widened0, widened1
1818 and an addition result are all used more than once. Thus we can
1819 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1820 as "free", whereas widening the result of the average instruction
1821 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1822 therefore better not to go narrower than TARGET_PRECISION. */
1823 if (TYPE_PRECISION (new_type) < target_precision)
1824 new_type = build_nonstandard_integer_type (target_precision,
1825 TYPE_UNSIGNED (new_type));
1827 /* Check for target support. */
1828 tree new_vectype = get_vectype_for_scalar_type (new_type);
1829 if (!new_vectype
1830 || !direct_internal_fn_supported_p (ifn, new_vectype,
1831 OPTIMIZE_FOR_SPEED))
1832 return NULL;
1834 /* The IR requires a valid vector type for the cast result, even though
1835 it's likely to be discarded. */
1836 *type_out = get_vectype_for_scalar_type (type);
1837 if (!*type_out)
1838 return NULL;
1840 /* Generate the IFN_AVG* call. */
1841 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1842 tree new_ops[2];
1843 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1844 unprom, new_vectype);
1845 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1846 new_ops[1]);
1847 gimple_call_set_lhs (average_stmt, new_var);
1848 gimple_set_location (average_stmt, gimple_location (last_stmt));
1850 if (dump_enabled_p ())
1852 dump_printf_loc (MSG_NOTE, vect_location,
1853 "created pattern stmt: ");
1854 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0);
1857 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1860 /* Recognize cases in which the input to a cast is wider than its
1861 output, and the input is fed by a widening operation. Fold this
1862 by removing the unnecessary intermediate widening. E.g.:
1864 unsigned char a;
1865 unsigned int b = (unsigned int) a;
1866 unsigned short c = (unsigned short) b;
1870 unsigned short c = (unsigned short) a;
1872 Although this is rare in input IR, it is an expected side-effect
1873 of the over-widening pattern above.
1875 This is beneficial also for integer-to-float conversions, if the
1876 widened integer has more bits than the float, and if the unwidened
1877 input doesn't. */
1879 static gimple *
1880 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1882 /* Check for a cast, including an integer-to-float conversion. */
1883 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1884 if (!last_stmt)
1885 return NULL;
1886 tree_code code = gimple_assign_rhs_code (last_stmt);
1887 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1888 return NULL;
1890 /* Make sure that the rhs is a scalar with a natural bitsize. */
1891 tree lhs = gimple_assign_lhs (last_stmt);
1892 if (!lhs)
1893 return NULL;
1894 tree lhs_type = TREE_TYPE (lhs);
1895 scalar_mode lhs_mode;
1896 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1897 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1898 return NULL;
1900 /* Check for a narrowing operation (from a vector point of view). */
1901 tree rhs = gimple_assign_rhs1 (last_stmt);
1902 tree rhs_type = TREE_TYPE (rhs);
1903 if (!INTEGRAL_TYPE_P (rhs_type)
1904 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1905 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1906 return NULL;
1908 /* Try to find an unpromoted input. */
1909 vec_info *vinfo = last_stmt_info->vinfo;
1910 vect_unpromoted_value unprom;
1911 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1912 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1913 return NULL;
1915 /* If the bits above RHS_TYPE matter, make sure that they're the
1916 same when extending from UNPROM as they are when extending from RHS. */
1917 if (!INTEGRAL_TYPE_P (lhs_type)
1918 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1919 return NULL;
1921 /* We can get the same result by casting UNPROM directly, to avoid
1922 the unnecessary widening and narrowing. */
1923 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1925 *type_out = get_vectype_for_scalar_type (lhs_type);
1926 if (!*type_out)
1927 return NULL;
1929 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1930 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1931 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1933 return pattern_stmt;
1936 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1937 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1939 static gimple *
1940 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1942 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
1943 WIDEN_LSHIFT_EXPR, true,
1944 "vect_recog_widen_shift_pattern");
1947 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1949 type a_t, b_t, c_t;
1951 S0 a_t = b_t r<< c_t;
1953 Input/Output:
1955 * STMT_VINFO: The stmt from which the pattern search begins,
1956 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1957 with a sequence:
1959 S1 d_t = -c_t;
1960 S2 e_t = d_t & (B - 1);
1961 S3 f_t = b_t << c_t;
1962 S4 g_t = b_t >> e_t;
1963 S0 a_t = f_t | g_t;
1965 where B is element bitsize of type.
1967 Output:
1969 * TYPE_OUT: The type of the output of this pattern.
1971 * Return value: A new stmt that will be used to replace the rotate
1972 S0 stmt. */
1974 static gimple *
1975 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1977 gimple *last_stmt = stmt_vinfo->stmt;
1978 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1979 gimple *pattern_stmt, *def_stmt;
1980 enum tree_code rhs_code;
1981 vec_info *vinfo = stmt_vinfo->vinfo;
1982 enum vect_def_type dt;
1983 optab optab1, optab2;
1984 edge ext_def = NULL;
1986 if (!is_gimple_assign (last_stmt))
1987 return NULL;
1989 rhs_code = gimple_assign_rhs_code (last_stmt);
1990 switch (rhs_code)
1992 case LROTATE_EXPR:
1993 case RROTATE_EXPR:
1994 break;
1995 default:
1996 return NULL;
1999 lhs = gimple_assign_lhs (last_stmt);
2000 oprnd0 = gimple_assign_rhs1 (last_stmt);
2001 type = TREE_TYPE (oprnd0);
2002 oprnd1 = gimple_assign_rhs2 (last_stmt);
2003 if (TREE_CODE (oprnd0) != SSA_NAME
2004 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2005 || !INTEGRAL_TYPE_P (type)
2006 || !TYPE_UNSIGNED (type))
2007 return NULL;
2009 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt))
2010 return NULL;
2012 if (dt != vect_internal_def
2013 && dt != vect_constant_def
2014 && dt != vect_external_def)
2015 return NULL;
2017 vectype = get_vectype_for_scalar_type (type);
2018 if (vectype == NULL_TREE)
2019 return NULL;
2021 /* If vector/vector or vector/scalar rotate is supported by the target,
2022 don't do anything here. */
2023 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2024 if (optab1
2025 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2026 return NULL;
2028 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2030 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2031 if (optab2
2032 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2033 return NULL;
2036 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2037 don't do anything here either. */
2038 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2039 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2040 if (!optab1
2041 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2042 || !optab2
2043 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2045 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2046 return NULL;
2047 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2048 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2049 if (!optab1
2050 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2051 || !optab2
2052 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2053 return NULL;
2056 *type_out = vectype;
2058 if (dt == vect_external_def
2059 && TREE_CODE (oprnd1) == SSA_NAME)
2060 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2062 def = NULL_TREE;
2063 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2064 if (TREE_CODE (oprnd1) == INTEGER_CST
2065 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2066 def = oprnd1;
2067 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2069 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2070 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2071 && TYPE_PRECISION (TREE_TYPE (rhs1))
2072 == TYPE_PRECISION (type))
2073 def = rhs1;
2076 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2077 if (def == NULL_TREE)
2079 def = vect_recog_temp_ssa_var (type, NULL);
2080 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2081 if (ext_def)
2083 basic_block new_bb
2084 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2085 gcc_assert (!new_bb);
2087 else
2088 append_pattern_def_seq (stmt_vinfo, def_stmt);
2090 stype = TREE_TYPE (def);
2091 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2093 if (TREE_CODE (def) == INTEGER_CST)
2095 if (!tree_fits_uhwi_p (def)
2096 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2097 || integer_zerop (def))
2098 return NULL;
2099 def2 = build_int_cst (stype,
2100 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2102 else
2104 tree vecstype = get_vectype_for_scalar_type (stype);
2105 stmt_vec_info def_stmt_vinfo;
2107 if (vecstype == NULL_TREE)
2108 return NULL;
2109 def2 = vect_recog_temp_ssa_var (stype, NULL);
2110 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2111 if (ext_def)
2113 basic_block new_bb
2114 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2115 gcc_assert (!new_bb);
2117 else
2119 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2120 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2121 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2122 append_pattern_def_seq (stmt_vinfo, def_stmt);
2125 def2 = vect_recog_temp_ssa_var (stype, NULL);
2126 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2127 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2128 gimple_assign_lhs (def_stmt), mask);
2129 if (ext_def)
2131 basic_block new_bb
2132 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2133 gcc_assert (!new_bb);
2135 else
2137 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2138 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2139 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2140 append_pattern_def_seq (stmt_vinfo, def_stmt);
2144 var1 = vect_recog_temp_ssa_var (type, NULL);
2145 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2146 ? LSHIFT_EXPR : RSHIFT_EXPR,
2147 oprnd0, def);
2148 append_pattern_def_seq (stmt_vinfo, def_stmt);
2150 var2 = vect_recog_temp_ssa_var (type, NULL);
2151 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2152 ? RSHIFT_EXPR : LSHIFT_EXPR,
2153 oprnd0, def2);
2154 append_pattern_def_seq (stmt_vinfo, def_stmt);
2156 /* Pattern detected. */
2157 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2159 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2160 var = vect_recog_temp_ssa_var (type, NULL);
2161 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2163 return pattern_stmt;
2166 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2167 vectorized:
2169 type a_t;
2170 TYPE b_T, res_T;
2172 S1 a_t = ;
2173 S2 b_T = ;
2174 S3 res_T = b_T op a_t;
2176 where type 'TYPE' is a type with different size than 'type',
2177 and op is <<, >> or rotate.
2179 Also detect cases:
2181 type a_t;
2182 TYPE b_T, c_T, res_T;
2184 S0 c_T = ;
2185 S1 a_t = (type) c_T;
2186 S2 b_T = ;
2187 S3 res_T = b_T op a_t;
2189 Input/Output:
2191 * STMT_VINFO: The stmt from which the pattern search begins,
2192 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2193 with a shift/rotate which has same type on both operands, in the
2194 second case just b_T op c_T, in the first case with added cast
2195 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2197 Output:
2199 * TYPE_OUT: The type of the output of this pattern.
2201 * Return value: A new stmt that will be used to replace the shift/rotate
2202 S3 stmt. */
2204 static gimple *
2205 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2206 tree *type_out)
2208 gimple *last_stmt = stmt_vinfo->stmt;
2209 tree oprnd0, oprnd1, lhs, var;
2210 gimple *pattern_stmt;
2211 enum tree_code rhs_code;
2212 vec_info *vinfo = stmt_vinfo->vinfo;
2214 if (!is_gimple_assign (last_stmt))
2215 return NULL;
2217 rhs_code = gimple_assign_rhs_code (last_stmt);
2218 switch (rhs_code)
2220 case LSHIFT_EXPR:
2221 case RSHIFT_EXPR:
2222 case LROTATE_EXPR:
2223 case RROTATE_EXPR:
2224 break;
2225 default:
2226 return NULL;
2229 lhs = gimple_assign_lhs (last_stmt);
2230 oprnd0 = gimple_assign_rhs1 (last_stmt);
2231 oprnd1 = gimple_assign_rhs2 (last_stmt);
2232 if (TREE_CODE (oprnd0) != SSA_NAME
2233 || TREE_CODE (oprnd1) != SSA_NAME
2234 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2235 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2236 || TYPE_PRECISION (TREE_TYPE (lhs))
2237 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2238 return NULL;
2240 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2241 if (!def_vinfo)
2242 return NULL;
2244 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2245 if (*type_out == NULL_TREE)
2246 return NULL;
2248 tree def = NULL_TREE;
2249 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2250 if (def_stmt && gimple_assign_cast_p (def_stmt))
2252 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2253 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2254 && TYPE_PRECISION (TREE_TYPE (rhs1))
2255 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2257 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2258 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2259 def = rhs1;
2260 else
2262 tree mask
2263 = build_low_bits_mask (TREE_TYPE (rhs1),
2264 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2265 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2266 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2267 stmt_vec_info new_stmt_info
2268 = new_stmt_vec_info (def_stmt, vinfo);
2269 set_vinfo_for_stmt (def_stmt, new_stmt_info);
2270 STMT_VINFO_VECTYPE (new_stmt_info)
2271 = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2272 new_pattern_def_seq (stmt_vinfo, def_stmt);
2277 if (def == NULL_TREE)
2279 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2280 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2281 new_pattern_def_seq (stmt_vinfo, def_stmt);
2284 /* Pattern detected. */
2285 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2287 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2288 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2289 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2291 return pattern_stmt;
2294 /* Return true iff the target has a vector optab implementing the operation
2295 CODE on type VECTYPE. */
2297 static bool
2298 target_has_vecop_for_code (tree_code code, tree vectype)
2300 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2301 return voptab
2302 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2305 /* Verify that the target has optabs of VECTYPE to perform all the steps
2306 needed by the multiplication-by-immediate synthesis algorithm described by
2307 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2308 present. Return true iff the target supports all the steps. */
2310 static bool
2311 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2312 tree vectype, bool synth_shift_p)
2314 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2315 return false;
2317 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2318 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2320 if (var == negate_variant
2321 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2322 return false;
2324 /* If we must synthesize shifts with additions make sure that vector
2325 addition is available. */
2326 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2327 return false;
2329 for (int i = 1; i < alg->ops; i++)
2331 switch (alg->op[i])
2333 case alg_shift:
2334 break;
2335 case alg_add_t_m2:
2336 case alg_add_t2_m:
2337 case alg_add_factor:
2338 if (!supports_vplus)
2339 return false;
2340 break;
2341 case alg_sub_t_m2:
2342 case alg_sub_t2_m:
2343 case alg_sub_factor:
2344 if (!supports_vminus)
2345 return false;
2346 break;
2347 case alg_unknown:
2348 case alg_m:
2349 case alg_zero:
2350 case alg_impossible:
2351 return false;
2352 default:
2353 gcc_unreachable ();
2357 return true;
2360 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2361 putting the final result in DEST. Append all statements but the last into
2362 VINFO. Return the last statement. */
2364 static gimple *
2365 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2366 stmt_vec_info vinfo)
2368 HOST_WIDE_INT i;
2369 tree itype = TREE_TYPE (op);
2370 tree prev_res = op;
2371 gcc_assert (amnt >= 0);
2372 for (i = 0; i < amnt; i++)
2374 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2375 : dest;
2376 gimple *stmt
2377 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2378 prev_res = tmp_var;
2379 if (i < amnt - 1)
2380 append_pattern_def_seq (vinfo, stmt);
2381 else
2382 return stmt;
2384 gcc_unreachable ();
2385 return NULL;
2388 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2389 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2390 the process if necessary. Append the resulting assignment statements
2391 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2392 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2393 left shifts using additions. */
2395 static tree
2396 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2397 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2399 if (integer_zerop (op2)
2400 && (code == LSHIFT_EXPR
2401 || code == PLUS_EXPR))
2403 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2404 return op1;
2407 gimple *stmt;
2408 tree itype = TREE_TYPE (op1);
2409 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2411 if (code == LSHIFT_EXPR
2412 && synth_shift_p)
2414 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2415 stmt_vinfo);
2416 append_pattern_def_seq (stmt_vinfo, stmt);
2417 return tmp_var;
2420 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2421 append_pattern_def_seq (stmt_vinfo, stmt);
2422 return tmp_var;
2425 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2426 and simple arithmetic operations to be vectorized. Record the statements
2427 produced in STMT_VINFO and return the last statement in the sequence or
2428 NULL if it's not possible to synthesize such a multiplication.
2429 This function mirrors the behavior of expand_mult_const in expmed.c but
2430 works on tree-ssa form. */
2432 static gimple *
2433 vect_synth_mult_by_constant (tree op, tree val,
2434 stmt_vec_info stmt_vinfo)
2436 tree itype = TREE_TYPE (op);
2437 machine_mode mode = TYPE_MODE (itype);
2438 struct algorithm alg;
2439 mult_variant variant;
2440 if (!tree_fits_shwi_p (val))
2441 return NULL;
2443 /* Multiplication synthesis by shifts, adds and subs can introduce
2444 signed overflow where the original operation didn't. Perform the
2445 operations on an unsigned type and cast back to avoid this.
2446 In the future we may want to relax this for synthesis algorithms
2447 that we can prove do not cause unexpected overflow. */
2448 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2450 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2452 /* Targets that don't support vector shifts but support vector additions
2453 can synthesize shifts that way. */
2454 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2456 HOST_WIDE_INT hwval = tree_to_shwi (val);
2457 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2458 The vectorizer's benefit analysis will decide whether it's beneficial
2459 to do this. */
2460 bool possible = choose_mult_variant (mode, hwval, &alg,
2461 &variant, MAX_COST);
2462 if (!possible)
2463 return NULL;
2465 tree vectype = get_vectype_for_scalar_type (multtype);
2467 if (!vectype
2468 || !target_supports_mult_synth_alg (&alg, variant,
2469 vectype, synth_shift_p))
2470 return NULL;
2472 tree accumulator;
2474 /* Clear out the sequence of statements so we can populate it below. */
2475 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2476 gimple *stmt = NULL;
2478 if (cast_to_unsigned_p)
2480 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2481 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2482 append_pattern_def_seq (stmt_vinfo, stmt);
2483 op = tmp_op;
2486 if (alg.op[0] == alg_zero)
2487 accumulator = build_int_cst (multtype, 0);
2488 else
2489 accumulator = op;
2491 bool needs_fixup = (variant == negate_variant)
2492 || (variant == add_variant);
2494 for (int i = 1; i < alg.ops; i++)
2496 tree shft_log = build_int_cst (multtype, alg.log[i]);
2497 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2498 tree tmp_var = NULL_TREE;
2500 switch (alg.op[i])
2502 case alg_shift:
2503 if (synth_shift_p)
2504 stmt
2505 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2506 stmt_vinfo);
2507 else
2508 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2509 shft_log);
2510 break;
2511 case alg_add_t_m2:
2512 tmp_var
2513 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2514 stmt_vinfo, synth_shift_p);
2515 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2516 tmp_var);
2517 break;
2518 case alg_sub_t_m2:
2519 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2520 shft_log, stmt_vinfo,
2521 synth_shift_p);
2522 /* In some algorithms the first step involves zeroing the
2523 accumulator. If subtracting from such an accumulator
2524 just emit the negation directly. */
2525 if (integer_zerop (accumulator))
2526 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2527 else
2528 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2529 tmp_var);
2530 break;
2531 case alg_add_t2_m:
2532 tmp_var
2533 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2534 stmt_vinfo, synth_shift_p);
2535 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2536 break;
2537 case alg_sub_t2_m:
2538 tmp_var
2539 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2540 stmt_vinfo, synth_shift_p);
2541 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2542 break;
2543 case alg_add_factor:
2544 tmp_var
2545 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2546 stmt_vinfo, synth_shift_p);
2547 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2548 tmp_var);
2549 break;
2550 case alg_sub_factor:
2551 tmp_var
2552 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2553 stmt_vinfo, synth_shift_p);
2554 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2555 accumulator);
2556 break;
2557 default:
2558 gcc_unreachable ();
2560 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2561 but rather return it directly. */
2563 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2564 append_pattern_def_seq (stmt_vinfo, stmt);
2565 accumulator = accum_tmp;
2567 if (variant == negate_variant)
2569 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2570 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2571 accumulator = accum_tmp;
2572 if (cast_to_unsigned_p)
2573 append_pattern_def_seq (stmt_vinfo, stmt);
2575 else if (variant == add_variant)
2577 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2578 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2579 accumulator = accum_tmp;
2580 if (cast_to_unsigned_p)
2581 append_pattern_def_seq (stmt_vinfo, stmt);
2583 /* Move back to a signed if needed. */
2584 if (cast_to_unsigned_p)
2586 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2587 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2590 return stmt;
2593 /* Detect multiplication by constant and convert it into a sequence of
2594 shifts and additions, subtractions, negations. We reuse the
2595 choose_mult_variant algorithms from expmed.c
2597 Input/Output:
2599 STMT_VINFO: The stmt from which the pattern search begins,
2600 i.e. the mult stmt.
2602 Output:
2604 * TYPE_OUT: The type of the output of this pattern.
2606 * Return value: A new stmt that will be used to replace
2607 the multiplication. */
2609 static gimple *
2610 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2612 gimple *last_stmt = stmt_vinfo->stmt;
2613 tree oprnd0, oprnd1, vectype, itype;
2614 gimple *pattern_stmt;
2616 if (!is_gimple_assign (last_stmt))
2617 return NULL;
2619 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2620 return NULL;
2622 oprnd0 = gimple_assign_rhs1 (last_stmt);
2623 oprnd1 = gimple_assign_rhs2 (last_stmt);
2624 itype = TREE_TYPE (oprnd0);
2626 if (TREE_CODE (oprnd0) != SSA_NAME
2627 || TREE_CODE (oprnd1) != INTEGER_CST
2628 || !INTEGRAL_TYPE_P (itype)
2629 || !type_has_mode_precision_p (itype))
2630 return NULL;
2632 vectype = get_vectype_for_scalar_type (itype);
2633 if (vectype == NULL_TREE)
2634 return NULL;
2636 /* If the target can handle vectorized multiplication natively,
2637 don't attempt to optimize this. */
2638 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2639 if (mul_optab != unknown_optab)
2641 machine_mode vec_mode = TYPE_MODE (vectype);
2642 int icode = (int) optab_handler (mul_optab, vec_mode);
2643 if (icode != CODE_FOR_nothing)
2644 return NULL;
2647 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2648 if (!pattern_stmt)
2649 return NULL;
2651 /* Pattern detected. */
2652 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2654 *type_out = vectype;
2656 return pattern_stmt;
2659 /* Detect a signed division by a constant that wouldn't be
2660 otherwise vectorized:
2662 type a_t, b_t;
2664 S1 a_t = b_t / N;
2666 where type 'type' is an integral type and N is a constant.
2668 Similarly handle modulo by a constant:
2670 S4 a_t = b_t % N;
2672 Input/Output:
2674 * STMT_VINFO: The stmt from which the pattern search begins,
2675 i.e. the division stmt. S1 is replaced by if N is a power
2676 of two constant and type is signed:
2677 S3 y_t = b_t < 0 ? N - 1 : 0;
2678 S2 x_t = b_t + y_t;
2679 S1' a_t = x_t >> log2 (N);
2681 S4 is replaced if N is a power of two constant and
2682 type is signed by (where *_T temporaries have unsigned type):
2683 S9 y_T = b_t < 0 ? -1U : 0U;
2684 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2685 S7 z_t = (type) z_T;
2686 S6 w_t = b_t + z_t;
2687 S5 x_t = w_t & (N - 1);
2688 S4' a_t = x_t - z_t;
2690 Output:
2692 * TYPE_OUT: The type of the output of this pattern.
2694 * Return value: A new stmt that will be used to replace the division
2695 S1 or modulo S4 stmt. */
2697 static gimple *
2698 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2700 gimple *last_stmt = stmt_vinfo->stmt;
2701 tree oprnd0, oprnd1, vectype, itype, cond;
2702 gimple *pattern_stmt, *def_stmt;
2703 enum tree_code rhs_code;
2704 vec_info *vinfo = stmt_vinfo->vinfo;
2705 optab optab;
2706 tree q;
2707 int dummy_int, prec;
2708 stmt_vec_info def_stmt_vinfo;
2710 if (!is_gimple_assign (last_stmt))
2711 return NULL;
2713 rhs_code = gimple_assign_rhs_code (last_stmt);
2714 switch (rhs_code)
2716 case TRUNC_DIV_EXPR:
2717 case TRUNC_MOD_EXPR:
2718 break;
2719 default:
2720 return NULL;
2723 oprnd0 = gimple_assign_rhs1 (last_stmt);
2724 oprnd1 = gimple_assign_rhs2 (last_stmt);
2725 itype = TREE_TYPE (oprnd0);
2726 if (TREE_CODE (oprnd0) != SSA_NAME
2727 || TREE_CODE (oprnd1) != INTEGER_CST
2728 || TREE_CODE (itype) != INTEGER_TYPE
2729 || !type_has_mode_precision_p (itype))
2730 return NULL;
2732 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2733 vectype = get_vectype_for_scalar_type (itype);
2734 if (vectype == NULL_TREE)
2735 return NULL;
2737 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2739 /* If the target can handle vectorized division or modulo natively,
2740 don't attempt to optimize this, since native division is likely
2741 to give smaller code. */
2742 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2743 if (optab != unknown_optab)
2745 machine_mode vec_mode = TYPE_MODE (vectype);
2746 int icode = (int) optab_handler (optab, vec_mode);
2747 if (icode != CODE_FOR_nothing)
2748 return NULL;
2752 prec = TYPE_PRECISION (itype);
2753 if (integer_pow2p (oprnd1))
2755 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2756 return NULL;
2758 /* Pattern detected. */
2759 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2761 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2762 build_int_cst (itype, 0));
2763 if (rhs_code == TRUNC_DIV_EXPR)
2765 tree var = vect_recog_temp_ssa_var (itype, NULL);
2766 tree shift;
2767 def_stmt
2768 = gimple_build_assign (var, COND_EXPR, cond,
2769 fold_build2 (MINUS_EXPR, itype, oprnd1,
2770 build_int_cst (itype, 1)),
2771 build_int_cst (itype, 0));
2772 new_pattern_def_seq (stmt_vinfo, def_stmt);
2773 var = vect_recog_temp_ssa_var (itype, NULL);
2774 def_stmt
2775 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2776 gimple_assign_lhs (def_stmt));
2777 append_pattern_def_seq (stmt_vinfo, def_stmt);
2779 shift = build_int_cst (itype, tree_log2 (oprnd1));
2780 pattern_stmt
2781 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2782 RSHIFT_EXPR, var, shift);
2784 else
2786 tree signmask;
2787 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2788 if (compare_tree_int (oprnd1, 2) == 0)
2790 signmask = vect_recog_temp_ssa_var (itype, NULL);
2791 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2792 build_int_cst (itype, 1),
2793 build_int_cst (itype, 0));
2794 append_pattern_def_seq (stmt_vinfo, def_stmt);
2796 else
2798 tree utype
2799 = build_nonstandard_integer_type (prec, 1);
2800 tree vecutype = get_vectype_for_scalar_type (utype);
2801 tree shift
2802 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2803 - tree_log2 (oprnd1));
2804 tree var = vect_recog_temp_ssa_var (utype, NULL);
2806 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2807 build_int_cst (utype, -1),
2808 build_int_cst (utype, 0));
2809 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2810 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2811 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2812 append_pattern_def_seq (stmt_vinfo, def_stmt);
2813 var = vect_recog_temp_ssa_var (utype, NULL);
2814 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2815 gimple_assign_lhs (def_stmt),
2816 shift);
2817 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2818 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2819 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2820 append_pattern_def_seq (stmt_vinfo, def_stmt);
2821 signmask = vect_recog_temp_ssa_var (itype, NULL);
2822 def_stmt
2823 = gimple_build_assign (signmask, NOP_EXPR, var);
2824 append_pattern_def_seq (stmt_vinfo, def_stmt);
2826 def_stmt
2827 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2828 PLUS_EXPR, oprnd0, signmask);
2829 append_pattern_def_seq (stmt_vinfo, def_stmt);
2830 def_stmt
2831 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2832 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2833 fold_build2 (MINUS_EXPR, itype, oprnd1,
2834 build_int_cst (itype, 1)));
2835 append_pattern_def_seq (stmt_vinfo, def_stmt);
2837 pattern_stmt
2838 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2839 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2840 signmask);
2843 *type_out = vectype;
2844 return pattern_stmt;
2847 if (prec > HOST_BITS_PER_WIDE_INT
2848 || integer_zerop (oprnd1))
2849 return NULL;
2851 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2852 return NULL;
2854 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2856 if (TYPE_UNSIGNED (itype))
2858 unsigned HOST_WIDE_INT mh, ml;
2859 int pre_shift, post_shift;
2860 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2861 & GET_MODE_MASK (itype_mode));
2862 tree t1, t2, t3, t4;
2864 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2865 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2866 return NULL;
2868 /* Find a suitable multiplier and right shift count
2869 instead of multiplying with D. */
2870 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2872 /* If the suggested multiplier is more than SIZE bits, we can do better
2873 for even divisors, using an initial right shift. */
2874 if (mh != 0 && (d & 1) == 0)
2876 pre_shift = ctz_or_zero (d);
2877 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2878 &ml, &post_shift, &dummy_int);
2879 gcc_assert (!mh);
2881 else
2882 pre_shift = 0;
2884 if (mh != 0)
2886 if (post_shift - 1 >= prec)
2887 return NULL;
2889 /* t1 = oprnd0 h* ml;
2890 t2 = oprnd0 - t1;
2891 t3 = t2 >> 1;
2892 t4 = t1 + t3;
2893 q = t4 >> (post_shift - 1); */
2894 t1 = vect_recog_temp_ssa_var (itype, NULL);
2895 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2896 build_int_cst (itype, ml));
2897 append_pattern_def_seq (stmt_vinfo, def_stmt);
2899 t2 = vect_recog_temp_ssa_var (itype, NULL);
2900 def_stmt
2901 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2902 append_pattern_def_seq (stmt_vinfo, def_stmt);
2904 t3 = vect_recog_temp_ssa_var (itype, NULL);
2905 def_stmt
2906 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2907 append_pattern_def_seq (stmt_vinfo, def_stmt);
2909 t4 = vect_recog_temp_ssa_var (itype, NULL);
2910 def_stmt
2911 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2913 if (post_shift != 1)
2915 append_pattern_def_seq (stmt_vinfo, def_stmt);
2917 q = vect_recog_temp_ssa_var (itype, NULL);
2918 pattern_stmt
2919 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2920 build_int_cst (itype, post_shift - 1));
2922 else
2924 q = t4;
2925 pattern_stmt = def_stmt;
2928 else
2930 if (pre_shift >= prec || post_shift >= prec)
2931 return NULL;
2933 /* t1 = oprnd0 >> pre_shift;
2934 t2 = t1 h* ml;
2935 q = t2 >> post_shift; */
2936 if (pre_shift)
2938 t1 = vect_recog_temp_ssa_var (itype, NULL);
2939 def_stmt
2940 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2941 build_int_cst (NULL, pre_shift));
2942 append_pattern_def_seq (stmt_vinfo, def_stmt);
2944 else
2945 t1 = oprnd0;
2947 t2 = vect_recog_temp_ssa_var (itype, NULL);
2948 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2949 build_int_cst (itype, ml));
2951 if (post_shift)
2953 append_pattern_def_seq (stmt_vinfo, def_stmt);
2955 q = vect_recog_temp_ssa_var (itype, NULL);
2956 def_stmt
2957 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2958 build_int_cst (itype, post_shift));
2960 else
2961 q = t2;
2963 pattern_stmt = def_stmt;
2966 else
2968 unsigned HOST_WIDE_INT ml;
2969 int post_shift;
2970 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2971 unsigned HOST_WIDE_INT abs_d;
2972 bool add = false;
2973 tree t1, t2, t3, t4;
2975 /* Give up for -1. */
2976 if (d == -1)
2977 return NULL;
2979 /* Since d might be INT_MIN, we have to cast to
2980 unsigned HOST_WIDE_INT before negating to avoid
2981 undefined signed overflow. */
2982 abs_d = (d >= 0
2983 ? (unsigned HOST_WIDE_INT) d
2984 : - (unsigned HOST_WIDE_INT) d);
2986 /* n rem d = n rem -d */
2987 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2989 d = abs_d;
2990 oprnd1 = build_int_cst (itype, abs_d);
2992 else if (HOST_BITS_PER_WIDE_INT >= prec
2993 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2994 /* This case is not handled correctly below. */
2995 return NULL;
2997 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2998 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3000 add = true;
3001 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3003 if (post_shift >= prec)
3004 return NULL;
3006 /* t1 = oprnd0 h* ml; */
3007 t1 = vect_recog_temp_ssa_var (itype, NULL);
3008 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3009 build_int_cst (itype, ml));
3011 if (add)
3013 /* t2 = t1 + oprnd0; */
3014 append_pattern_def_seq (stmt_vinfo, def_stmt);
3015 t2 = vect_recog_temp_ssa_var (itype, NULL);
3016 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3018 else
3019 t2 = t1;
3021 if (post_shift)
3023 /* t3 = t2 >> post_shift; */
3024 append_pattern_def_seq (stmt_vinfo, def_stmt);
3025 t3 = vect_recog_temp_ssa_var (itype, NULL);
3026 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3027 build_int_cst (itype, post_shift));
3029 else
3030 t3 = t2;
3032 wide_int oprnd0_min, oprnd0_max;
3033 int msb = 1;
3034 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3036 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3037 msb = 0;
3038 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3039 msb = -1;
3042 if (msb == 0 && d >= 0)
3044 /* q = t3; */
3045 q = t3;
3046 pattern_stmt = def_stmt;
3048 else
3050 /* t4 = oprnd0 >> (prec - 1);
3051 or if we know from VRP that oprnd0 >= 0
3052 t4 = 0;
3053 or if we know from VRP that oprnd0 < 0
3054 t4 = -1; */
3055 append_pattern_def_seq (stmt_vinfo, def_stmt);
3056 t4 = vect_recog_temp_ssa_var (itype, NULL);
3057 if (msb != 1)
3058 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3059 build_int_cst (itype, msb));
3060 else
3061 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3062 build_int_cst (itype, prec - 1));
3063 append_pattern_def_seq (stmt_vinfo, def_stmt);
3065 /* q = t3 - t4; or q = t4 - t3; */
3066 q = vect_recog_temp_ssa_var (itype, NULL);
3067 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3068 d < 0 ? t3 : t4);
3072 if (rhs_code == TRUNC_MOD_EXPR)
3074 tree r, t1;
3076 /* We divided. Now finish by:
3077 t1 = q * oprnd1;
3078 r = oprnd0 - t1; */
3079 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3081 t1 = vect_recog_temp_ssa_var (itype, NULL);
3082 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3083 append_pattern_def_seq (stmt_vinfo, def_stmt);
3085 r = vect_recog_temp_ssa_var (itype, NULL);
3086 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3089 /* Pattern detected. */
3090 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3092 *type_out = vectype;
3093 return pattern_stmt;
3096 /* Function vect_recog_mixed_size_cond_pattern
3098 Try to find the following pattern:
3100 type x_t, y_t;
3101 TYPE a_T, b_T, c_T;
3102 loop:
3103 S1 a_T = x_t CMP y_t ? b_T : c_T;
3105 where type 'TYPE' is an integral type which has different size
3106 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3107 than 'type', the constants need to fit into an integer type
3108 with the same width as 'type') or results of conversion from 'type'.
3110 Input:
3112 * STMT_VINFO: The stmt from which the pattern search begins.
3114 Output:
3116 * TYPE_OUT: The type of the output of this pattern.
3118 * Return value: A new stmt that will be used to replace the pattern.
3119 Additionally a def_stmt is added.
3121 a_it = x_t CMP y_t ? b_it : c_it;
3122 a_T = (TYPE) a_it; */
3124 static gimple *
3125 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3127 gimple *last_stmt = stmt_vinfo->stmt;
3128 tree cond_expr, then_clause, else_clause;
3129 stmt_vec_info def_stmt_info;
3130 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3131 gimple *pattern_stmt, *def_stmt;
3132 vec_info *vinfo = stmt_vinfo->vinfo;
3133 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3134 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3135 bool promotion;
3136 tree comp_scalar_type;
3138 if (!is_gimple_assign (last_stmt)
3139 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3140 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3141 return NULL;
3143 cond_expr = gimple_assign_rhs1 (last_stmt);
3144 then_clause = gimple_assign_rhs2 (last_stmt);
3145 else_clause = gimple_assign_rhs3 (last_stmt);
3147 if (!COMPARISON_CLASS_P (cond_expr))
3148 return NULL;
3150 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3151 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3152 if (comp_vectype == NULL_TREE)
3153 return NULL;
3155 type = gimple_expr_type (last_stmt);
3156 if (types_compatible_p (type, comp_scalar_type)
3157 || ((TREE_CODE (then_clause) != INTEGER_CST
3158 || TREE_CODE (else_clause) != INTEGER_CST)
3159 && !INTEGRAL_TYPE_P (comp_scalar_type))
3160 || !INTEGRAL_TYPE_P (type))
3161 return NULL;
3163 if ((TREE_CODE (then_clause) != INTEGER_CST
3164 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3165 &def_stmt0, &promotion))
3166 || (TREE_CODE (else_clause) != INTEGER_CST
3167 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3168 &def_stmt1, &promotion)))
3169 return NULL;
3171 if (orig_type0 && orig_type1
3172 && !types_compatible_p (orig_type0, orig_type1))
3173 return NULL;
3175 if (orig_type0)
3177 if (!types_compatible_p (orig_type0, comp_scalar_type))
3178 return NULL;
3179 then_clause = gimple_assign_rhs1 (def_stmt0);
3180 itype = orig_type0;
3183 if (orig_type1)
3185 if (!types_compatible_p (orig_type1, comp_scalar_type))
3186 return NULL;
3187 else_clause = gimple_assign_rhs1 (def_stmt1);
3188 itype = orig_type1;
3192 HOST_WIDE_INT cmp_mode_size
3193 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3195 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3196 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3197 return NULL;
3199 vectype = get_vectype_for_scalar_type (type);
3200 if (vectype == NULL_TREE)
3201 return NULL;
3203 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3204 return NULL;
3206 if (itype == NULL_TREE)
3207 itype = build_nonstandard_integer_type (cmp_mode_size,
3208 TYPE_UNSIGNED (type));
3210 if (itype == NULL_TREE
3211 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3212 return NULL;
3214 vecitype = get_vectype_for_scalar_type (itype);
3215 if (vecitype == NULL_TREE)
3216 return NULL;
3218 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3219 return NULL;
3221 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3223 if ((TREE_CODE (then_clause) == INTEGER_CST
3224 && !int_fits_type_p (then_clause, itype))
3225 || (TREE_CODE (else_clause) == INTEGER_CST
3226 && !int_fits_type_p (else_clause, itype)))
3227 return NULL;
3230 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3231 COND_EXPR, unshare_expr (cond_expr),
3232 fold_convert (itype, then_clause),
3233 fold_convert (itype, else_clause));
3234 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3235 NOP_EXPR, gimple_assign_lhs (def_stmt));
3237 new_pattern_def_seq (stmt_vinfo, def_stmt);
3238 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3239 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3240 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3241 *type_out = vectype;
3243 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3245 return pattern_stmt;
3249 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3250 true if bool VAR can and should be optimized that way. Assume it shouldn't
3251 in case it's a result of a comparison which can be directly vectorized into
3252 a vector comparison. Fills in STMTS with all stmts visited during the
3253 walk. */
3255 static bool
3256 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3258 tree rhs1;
3259 enum tree_code rhs_code;
3261 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3262 if (!def_stmt_info)
3263 return false;
3265 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3266 if (!def_stmt)
3267 return false;
3269 if (stmts.contains (def_stmt))
3270 return true;
3272 rhs1 = gimple_assign_rhs1 (def_stmt);
3273 rhs_code = gimple_assign_rhs_code (def_stmt);
3274 switch (rhs_code)
3276 case SSA_NAME:
3277 if (! check_bool_pattern (rhs1, vinfo, stmts))
3278 return false;
3279 break;
3281 CASE_CONVERT:
3282 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3283 return false;
3284 if (! check_bool_pattern (rhs1, vinfo, stmts))
3285 return false;
3286 break;
3288 case BIT_NOT_EXPR:
3289 if (! check_bool_pattern (rhs1, vinfo, stmts))
3290 return false;
3291 break;
3293 case BIT_AND_EXPR:
3294 case BIT_IOR_EXPR:
3295 case BIT_XOR_EXPR:
3296 if (! check_bool_pattern (rhs1, vinfo, stmts)
3297 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3298 return false;
3299 break;
3301 default:
3302 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3304 tree vecitype, comp_vectype;
3306 /* If the comparison can throw, then is_gimple_condexpr will be
3307 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3308 if (stmt_could_throw_p (def_stmt))
3309 return false;
3311 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3312 if (comp_vectype == NULL_TREE)
3313 return false;
3315 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3316 if (mask_type
3317 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3318 return false;
3320 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3322 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3323 tree itype
3324 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3325 vecitype = get_vectype_for_scalar_type (itype);
3326 if (vecitype == NULL_TREE)
3327 return false;
3329 else
3330 vecitype = comp_vectype;
3331 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3332 return false;
3334 else
3335 return false;
3336 break;
3339 bool res = stmts.add (def_stmt);
3340 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3341 gcc_assert (!res);
3343 return true;
3347 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3348 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3349 pattern sequence. */
3351 static tree
3352 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3354 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3355 NOP_EXPR, var);
3356 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3357 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3358 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3359 append_pattern_def_seq (stmt_info, cast_stmt);
3360 return gimple_assign_lhs (cast_stmt);
3363 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3364 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3365 type, OUT_TYPE is the desired final integer type of the whole pattern.
3366 STMT_INFO is the info of the pattern root and is where pattern stmts should
3367 be associated with. DEFS is a map of pattern defs. */
3369 static void
3370 adjust_bool_pattern (tree var, tree out_type,
3371 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3373 gimple *stmt = SSA_NAME_DEF_STMT (var);
3374 enum tree_code rhs_code, def_rhs_code;
3375 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3376 location_t loc;
3377 gimple *pattern_stmt, *def_stmt;
3378 tree trueval = NULL_TREE;
3380 rhs1 = gimple_assign_rhs1 (stmt);
3381 rhs2 = gimple_assign_rhs2 (stmt);
3382 rhs_code = gimple_assign_rhs_code (stmt);
3383 loc = gimple_location (stmt);
3384 switch (rhs_code)
3386 case SSA_NAME:
3387 CASE_CONVERT:
3388 irhs1 = *defs.get (rhs1);
3389 itype = TREE_TYPE (irhs1);
3390 pattern_stmt
3391 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3392 SSA_NAME, irhs1);
3393 break;
3395 case BIT_NOT_EXPR:
3396 irhs1 = *defs.get (rhs1);
3397 itype = TREE_TYPE (irhs1);
3398 pattern_stmt
3399 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3400 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3401 break;
3403 case BIT_AND_EXPR:
3404 /* Try to optimize x = y & (a < b ? 1 : 0); into
3405 x = (a < b ? y : 0);
3407 E.g. for:
3408 bool a_b, b_b, c_b;
3409 TYPE d_T;
3411 S1 a_b = x1 CMP1 y1;
3412 S2 b_b = x2 CMP2 y2;
3413 S3 c_b = a_b & b_b;
3414 S4 d_T = (TYPE) c_b;
3416 we would normally emit:
3418 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3419 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3420 S3' c_T = a_T & b_T;
3421 S4' d_T = c_T;
3423 but we can save one stmt by using the
3424 result of one of the COND_EXPRs in the other COND_EXPR and leave
3425 BIT_AND_EXPR stmt out:
3427 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3428 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3429 S4' f_T = c_T;
3431 At least when VEC_COND_EXPR is implemented using masks
3432 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3433 computes the comparison masks and ands it, in one case with
3434 all ones vector, in the other case with a vector register.
3435 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3436 often more expensive. */
3437 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3438 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3439 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3441 irhs1 = *defs.get (rhs1);
3442 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3443 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3444 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3446 rhs_code = def_rhs_code;
3447 rhs1 = def_rhs1;
3448 rhs2 = gimple_assign_rhs2 (def_stmt);
3449 trueval = irhs1;
3450 goto do_compare;
3452 else
3453 irhs2 = *defs.get (rhs2);
3454 goto and_ior_xor;
3456 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3457 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3458 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3460 irhs2 = *defs.get (rhs2);
3461 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3462 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3463 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3465 rhs_code = def_rhs_code;
3466 rhs1 = def_rhs1;
3467 rhs2 = gimple_assign_rhs2 (def_stmt);
3468 trueval = irhs2;
3469 goto do_compare;
3471 else
3472 irhs1 = *defs.get (rhs1);
3473 goto and_ior_xor;
3475 /* FALLTHRU */
3476 case BIT_IOR_EXPR:
3477 case BIT_XOR_EXPR:
3478 irhs1 = *defs.get (rhs1);
3479 irhs2 = *defs.get (rhs2);
3480 and_ior_xor:
3481 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3482 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3484 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3485 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3486 int out_prec = TYPE_PRECISION (out_type);
3487 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3488 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3489 stmt_info);
3490 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3491 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3492 stmt_info);
3493 else
3495 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3496 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3499 itype = TREE_TYPE (irhs1);
3500 pattern_stmt
3501 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3502 rhs_code, irhs1, irhs2);
3503 break;
3505 default:
3506 do_compare:
3507 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3508 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3509 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3510 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3511 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3513 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3514 itype
3515 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3517 else
3518 itype = TREE_TYPE (rhs1);
3519 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3520 if (trueval == NULL_TREE)
3521 trueval = build_int_cst (itype, 1);
3522 else
3523 gcc_checking_assert (useless_type_conversion_p (itype,
3524 TREE_TYPE (trueval)));
3525 pattern_stmt
3526 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3527 COND_EXPR, cond_expr, trueval,
3528 build_int_cst (itype, 0));
3529 break;
3532 gimple_set_location (pattern_stmt, loc);
3533 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3534 pattern def seq stmts instead of just letting auto-detection do
3535 its work? */
3536 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3537 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3538 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3539 append_pattern_def_seq (stmt_info, pattern_stmt);
3540 defs.put (var, gimple_assign_lhs (pattern_stmt));
3543 /* Comparison function to qsort a vector of gimple stmts after UID. */
3545 static int
3546 sort_after_uid (const void *p1, const void *p2)
3548 const gimple *stmt1 = *(const gimple * const *)p1;
3549 const gimple *stmt2 = *(const gimple * const *)p2;
3550 return gimple_uid (stmt1) - gimple_uid (stmt2);
3553 /* Create pattern stmts for all stmts participating in the bool pattern
3554 specified by BOOL_STMT_SET and its root STMT with the desired type
3555 OUT_TYPE. Return the def of the pattern root. */
3557 static tree
3558 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3559 tree out_type, gimple *stmt)
3561 /* Gather original stmts in the bool pattern in their order of appearance
3562 in the IL. */
3563 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3564 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3565 i != bool_stmt_set.end (); ++i)
3566 bool_stmts.quick_push (*i);
3567 bool_stmts.qsort (sort_after_uid);
3569 /* Now process them in that order, producing pattern stmts. */
3570 hash_map <tree, tree> defs;
3571 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3572 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3573 out_type, vinfo_for_stmt (stmt), defs);
3575 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3576 gimple *pattern_stmt
3577 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3578 return gimple_assign_lhs (pattern_stmt);
3581 /* Helper for search_type_for_mask. */
3583 static tree
3584 search_type_for_mask_1 (tree var, vec_info *vinfo,
3585 hash_map<gimple *, tree> &cache)
3587 tree rhs1;
3588 enum tree_code rhs_code;
3589 tree res = NULL_TREE, res2;
3591 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3592 return NULL_TREE;
3594 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3595 if (!def_stmt_info)
3596 return NULL_TREE;
3598 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3599 if (!def_stmt)
3600 return NULL_TREE;
3602 tree *c = cache.get (def_stmt);
3603 if (c)
3604 return *c;
3606 rhs_code = gimple_assign_rhs_code (def_stmt);
3607 rhs1 = gimple_assign_rhs1 (def_stmt);
3609 switch (rhs_code)
3611 case SSA_NAME:
3612 case BIT_NOT_EXPR:
3613 CASE_CONVERT:
3614 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3615 break;
3617 case BIT_AND_EXPR:
3618 case BIT_IOR_EXPR:
3619 case BIT_XOR_EXPR:
3620 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3621 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3622 cache);
3623 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3624 res = res2;
3625 break;
3627 default:
3628 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3630 tree comp_vectype, mask_type;
3632 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3634 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3635 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3636 vinfo, cache);
3637 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3638 res = res2;
3639 break;
3642 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3643 if (comp_vectype == NULL_TREE)
3645 res = NULL_TREE;
3646 break;
3649 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3650 if (!mask_type
3651 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3653 res = NULL_TREE;
3654 break;
3657 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3658 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3660 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3661 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3663 else
3664 res = TREE_TYPE (rhs1);
3668 cache.put (def_stmt, res);
3669 return res;
3672 /* Return the proper type for converting bool VAR into
3673 an integer value or NULL_TREE if no such type exists.
3674 The type is chosen so that converted value has the
3675 same number of elements as VAR's vector type. */
3677 static tree
3678 search_type_for_mask (tree var, vec_info *vinfo)
3680 hash_map<gimple *, tree> cache;
3681 return search_type_for_mask_1 (var, vinfo, cache);
3684 /* Function vect_recog_bool_pattern
3686 Try to find pattern like following:
3688 bool a_b, b_b, c_b, d_b, e_b;
3689 TYPE f_T;
3690 loop:
3691 S1 a_b = x1 CMP1 y1;
3692 S2 b_b = x2 CMP2 y2;
3693 S3 c_b = a_b & b_b;
3694 S4 d_b = x3 CMP3 y3;
3695 S5 e_b = c_b | d_b;
3696 S6 f_T = (TYPE) e_b;
3698 where type 'TYPE' is an integral type. Or a similar pattern
3699 ending in
3701 S6 f_Y = e_b ? r_Y : s_Y;
3703 as results from if-conversion of a complex condition.
3705 Input:
3707 * STMT_VINFO: The stmt at the end from which the pattern
3708 search begins, i.e. cast of a bool to
3709 an integer type.
3711 Output:
3713 * TYPE_OUT: The type of the output of this pattern.
3715 * Return value: A new stmt that will be used to replace the pattern.
3717 Assuming size of TYPE is the same as size of all comparisons
3718 (otherwise some casts would be added where needed), the above
3719 sequence we create related pattern stmts:
3720 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3721 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3722 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3723 S5' e_T = c_T | d_T;
3724 S6' f_T = e_T;
3726 Instead of the above S3' we could emit:
3727 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3728 S3' c_T = a_T | b_T;
3729 but the above is more efficient. */
3731 static gimple *
3732 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3734 gimple *last_stmt = stmt_vinfo->stmt;
3735 enum tree_code rhs_code;
3736 tree var, lhs, rhs, vectype;
3737 stmt_vec_info new_stmt_info;
3738 vec_info *vinfo = stmt_vinfo->vinfo;
3739 gimple *pattern_stmt;
3741 if (!is_gimple_assign (last_stmt))
3742 return NULL;
3744 var = gimple_assign_rhs1 (last_stmt);
3745 lhs = gimple_assign_lhs (last_stmt);
3747 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3748 return NULL;
3750 hash_set<gimple *> bool_stmts;
3752 rhs_code = gimple_assign_rhs_code (last_stmt);
3753 if (CONVERT_EXPR_CODE_P (rhs_code))
3755 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3756 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3757 return NULL;
3758 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3759 if (vectype == NULL_TREE)
3760 return NULL;
3762 if (check_bool_pattern (var, vinfo, bool_stmts))
3764 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3765 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3766 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3767 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3768 else
3769 pattern_stmt
3770 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3772 else
3774 tree type = search_type_for_mask (var, vinfo);
3775 tree cst0, cst1, tmp;
3777 if (!type)
3778 return NULL;
3780 /* We may directly use cond with narrowed type to avoid
3781 multiple cond exprs with following result packing and
3782 perform single cond with packed mask instead. In case
3783 of widening we better make cond first and then extract
3784 results. */
3785 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3786 type = TREE_TYPE (lhs);
3788 cst0 = build_int_cst (type, 0);
3789 cst1 = build_int_cst (type, 1);
3790 tmp = vect_recog_temp_ssa_var (type, NULL);
3791 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3793 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3795 tree new_vectype = get_vectype_for_scalar_type (type);
3796 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3797 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3798 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3799 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3801 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3802 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3806 *type_out = vectype;
3807 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3809 return pattern_stmt;
3811 else if (rhs_code == COND_EXPR
3812 && TREE_CODE (var) == SSA_NAME)
3814 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3815 if (vectype == NULL_TREE)
3816 return NULL;
3818 /* Build a scalar type for the boolean result that when
3819 vectorized matches the vector type of the result in
3820 size and number of elements. */
3821 unsigned prec
3822 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3823 TYPE_VECTOR_SUBPARTS (vectype));
3825 tree type
3826 = build_nonstandard_integer_type (prec,
3827 TYPE_UNSIGNED (TREE_TYPE (var)));
3828 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3829 return NULL;
3831 if (!check_bool_pattern (var, vinfo, bool_stmts))
3832 return NULL;
3834 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3836 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3837 pattern_stmt
3838 = gimple_build_assign (lhs, COND_EXPR,
3839 build2 (NE_EXPR, boolean_type_node,
3840 rhs, build_int_cst (type, 0)),
3841 gimple_assign_rhs2 (last_stmt),
3842 gimple_assign_rhs3 (last_stmt));
3843 *type_out = vectype;
3844 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3846 return pattern_stmt;
3848 else if (rhs_code == SSA_NAME
3849 && STMT_VINFO_DATA_REF (stmt_vinfo))
3851 stmt_vec_info pattern_stmt_info;
3852 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3853 gcc_assert (vectype != NULL_TREE);
3854 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3855 return NULL;
3857 if (check_bool_pattern (var, vinfo, bool_stmts))
3858 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3859 else
3861 tree type = search_type_for_mask (var, vinfo);
3862 tree cst0, cst1, new_vectype;
3864 if (!type)
3865 return NULL;
3867 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3868 type = TREE_TYPE (vectype);
3870 cst0 = build_int_cst (type, 0);
3871 cst1 = build_int_cst (type, 1);
3872 new_vectype = get_vectype_for_scalar_type (type);
3874 rhs = vect_recog_temp_ssa_var (type, NULL);
3875 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3877 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3878 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3879 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3880 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3883 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3884 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3886 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3887 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3888 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3889 rhs = rhs2;
3891 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3892 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3893 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3894 STMT_VINFO_DATA_REF (pattern_stmt_info)
3895 = STMT_VINFO_DATA_REF (stmt_vinfo);
3896 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3897 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3898 *type_out = vectype;
3899 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3901 return pattern_stmt;
3903 else
3904 return NULL;
3908 /* A helper for vect_recog_mask_conversion_pattern. Build
3909 conversion of MASK to a type suitable for masking VECTYPE.
3910 Built statement gets required vectype and is appended to
3911 a pattern sequence of STMT_VINFO.
3913 Return converted mask. */
3915 static tree
3916 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3917 vec_info *vinfo)
3919 gimple *stmt;
3920 tree masktype, tmp;
3921 stmt_vec_info new_stmt_info;
3923 masktype = build_same_sized_truth_vector_type (vectype);
3924 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3925 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3926 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3927 set_vinfo_for_stmt (stmt, new_stmt_info);
3928 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3929 append_pattern_def_seq (stmt_vinfo, stmt);
3931 return tmp;
3935 /* Function vect_recog_mask_conversion_pattern
3937 Try to find statements which require boolean type
3938 converison. Additional conversion statements are
3939 added to handle such cases. For example:
3941 bool m_1, m_2, m_3;
3942 int i_4, i_5;
3943 double d_6, d_7;
3944 char c_1, c_2, c_3;
3946 S1 m_1 = i_4 > i_5;
3947 S2 m_2 = d_6 < d_7;
3948 S3 m_3 = m_1 & m_2;
3949 S4 c_1 = m_3 ? c_2 : c_3;
3951 Will be transformed into:
3953 S1 m_1 = i_4 > i_5;
3954 S2 m_2 = d_6 < d_7;
3955 S3'' m_2' = (_Bool[bitsize=32])m_2
3956 S3' m_3' = m_1 & m_2';
3957 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3958 S4' c_1' = m_3'' ? c_2 : c_3; */
3960 static gimple *
3961 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3963 gimple *last_stmt = stmt_vinfo->stmt;
3964 enum tree_code rhs_code;
3965 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3966 tree vectype1, vectype2;
3967 stmt_vec_info pattern_stmt_info;
3968 vec_info *vinfo = stmt_vinfo->vinfo;
3970 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3971 if (is_gimple_call (last_stmt)
3972 && gimple_call_internal_p (last_stmt)
3973 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3974 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3976 gcall *pattern_stmt;
3977 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3979 if (load)
3981 lhs = gimple_call_lhs (last_stmt);
3982 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3984 else
3986 rhs2 = gimple_call_arg (last_stmt, 3);
3987 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3990 rhs1 = gimple_call_arg (last_stmt, 2);
3991 rhs1_type = search_type_for_mask (rhs1, vinfo);
3992 if (!rhs1_type)
3993 return NULL;
3994 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3996 if (!vectype1 || !vectype2
3997 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3998 TYPE_VECTOR_SUBPARTS (vectype2)))
3999 return NULL;
4001 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4003 if (load)
4005 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4006 pattern_stmt
4007 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
4008 gimple_call_arg (last_stmt, 0),
4009 gimple_call_arg (last_stmt, 1),
4010 tmp);
4011 gimple_call_set_lhs (pattern_stmt, lhs);
4013 else
4014 pattern_stmt
4015 = gimple_build_call_internal (IFN_MASK_STORE, 4,
4016 gimple_call_arg (last_stmt, 0),
4017 gimple_call_arg (last_stmt, 1),
4018 tmp,
4019 gimple_call_arg (last_stmt, 3));
4021 gimple_call_set_nothrow (pattern_stmt, true);
4023 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4024 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4025 STMT_VINFO_DATA_REF (pattern_stmt_info)
4026 = STMT_VINFO_DATA_REF (stmt_vinfo);
4027 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4028 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
4029 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4030 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo);
4032 *type_out = vectype1;
4033 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4035 return pattern_stmt;
4038 if (!is_gimple_assign (last_stmt))
4039 return NULL;
4041 gimple *pattern_stmt;
4042 lhs = gimple_assign_lhs (last_stmt);
4043 rhs1 = gimple_assign_rhs1 (last_stmt);
4044 rhs_code = gimple_assign_rhs_code (last_stmt);
4046 /* Check for cond expression requiring mask conversion. */
4047 if (rhs_code == COND_EXPR)
4049 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4051 if (TREE_CODE (rhs1) == SSA_NAME)
4053 rhs1_type = search_type_for_mask (rhs1, vinfo);
4054 if (!rhs1_type)
4055 return NULL;
4057 else if (COMPARISON_CLASS_P (rhs1))
4059 /* Check whether we're comparing scalar booleans and (if so)
4060 whether a better mask type exists than the mask associated
4061 with boolean-sized elements. This avoids unnecessary packs
4062 and unpacks if the booleans are set from comparisons of
4063 wider types. E.g. in:
4065 int x1, x2, x3, x4, y1, y1;
4067 bool b1 = (x1 == x2);
4068 bool b2 = (x3 == x4);
4069 ... = b1 == b2 ? y1 : y2;
4071 it is better for b1 and b2 to use the mask type associated
4072 with int elements rather bool (byte) elements. */
4073 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4074 if (!rhs1_type)
4075 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4077 else
4078 return NULL;
4080 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4082 if (!vectype1 || !vectype2)
4083 return NULL;
4085 /* Continue if a conversion is needed. Also continue if we have
4086 a comparison whose vector type would normally be different from
4087 VECTYPE2 when considered in isolation. In that case we'll
4088 replace the comparison with an SSA name (so that we can record
4089 its vector type) and behave as though the comparison was an SSA
4090 name from the outset. */
4091 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4092 TYPE_VECTOR_SUBPARTS (vectype2))
4093 && (TREE_CODE (rhs1) == SSA_NAME
4094 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4095 return NULL;
4097 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4098 in place, we can handle it in vectorizable_condition. This avoids
4099 unnecessary promotion stmts and increased vectorization factor. */
4100 if (COMPARISON_CLASS_P (rhs1)
4101 && INTEGRAL_TYPE_P (rhs1_type)
4102 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4103 TYPE_VECTOR_SUBPARTS (vectype2)))
4105 enum vect_def_type dt;
4106 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4107 && dt == vect_external_def
4108 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4109 && (dt == vect_external_def
4110 || dt == vect_constant_def))
4112 tree wide_scalar_type = build_nonstandard_integer_type
4113 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4114 TYPE_UNSIGNED (rhs1_type));
4115 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4116 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4117 return NULL;
4121 /* If rhs1 is a comparison we need to move it into a
4122 separate statement. */
4123 if (TREE_CODE (rhs1) != SSA_NAME)
4125 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4126 pattern_stmt = gimple_build_assign (tmp, rhs1);
4127 rhs1 = tmp;
4129 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4130 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4131 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4132 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4135 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4136 TYPE_VECTOR_SUBPARTS (vectype2)))
4137 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4138 else
4139 tmp = rhs1;
4141 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4142 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4143 gimple_assign_rhs2 (last_stmt),
4144 gimple_assign_rhs3 (last_stmt));
4146 *type_out = vectype1;
4147 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4149 return pattern_stmt;
4152 /* Now check for binary boolean operations requiring conversion for
4153 one of operands. */
4154 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4155 return NULL;
4157 if (rhs_code != BIT_IOR_EXPR
4158 && rhs_code != BIT_XOR_EXPR
4159 && rhs_code != BIT_AND_EXPR
4160 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4161 return NULL;
4163 rhs2 = gimple_assign_rhs2 (last_stmt);
4165 rhs1_type = search_type_for_mask (rhs1, vinfo);
4166 rhs2_type = search_type_for_mask (rhs2, vinfo);
4168 if (!rhs1_type || !rhs2_type
4169 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4170 return NULL;
4172 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4174 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4175 if (!vectype1)
4176 return NULL;
4177 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4179 else
4181 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4182 if (!vectype1)
4183 return NULL;
4184 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4187 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4188 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4190 *type_out = vectype1;
4191 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4193 return pattern_stmt;
4196 /* STMT is a load or store. If the load or store is conditional, return
4197 the boolean condition under which it occurs, otherwise return null. */
4199 static tree
4200 vect_get_load_store_mask (gimple *stmt)
4202 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4204 gcc_assert (gimple_assign_single_p (def_assign));
4205 return NULL_TREE;
4208 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4210 internal_fn ifn = gimple_call_internal_fn (def_call);
4211 int mask_index = internal_fn_mask_index (ifn);
4212 return gimple_call_arg (def_call, mask_index);
4215 gcc_unreachable ();
4218 /* Return the scalar offset type that an internal gather/scatter function
4219 should use. GS_INFO describes the gather/scatter operation. */
4221 static tree
4222 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4224 tree offset_type = TREE_TYPE (gs_info->offset);
4225 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4227 /* Enforced by vect_check_gather_scatter. */
4228 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4229 gcc_assert (element_bits >= offset_bits);
4231 /* If the offset is narrower than the elements, extend it according
4232 to its sign. */
4233 if (element_bits > offset_bits)
4234 return build_nonstandard_integer_type (element_bits,
4235 TYPE_UNSIGNED (offset_type));
4237 return offset_type;
4240 /* Return MASK if MASK is suitable for masking an operation on vectors
4241 of type VECTYPE, otherwise convert it into such a form and return
4242 the result. Associate any conversion statements with STMT_INFO's
4243 pattern. */
4245 static tree
4246 vect_convert_mask_for_vectype (tree mask, tree vectype,
4247 stmt_vec_info stmt_info, vec_info *vinfo)
4249 tree mask_type = search_type_for_mask (mask, vinfo);
4250 if (mask_type)
4252 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4253 if (mask_vectype
4254 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4255 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4256 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4258 return mask;
4261 /* Return the equivalent of:
4263 fold_convert (TYPE, VALUE)
4265 with the expectation that the operation will be vectorized.
4266 If new statements are needed, add them as pattern statements
4267 to STMT_INFO. */
4269 static tree
4270 vect_add_conversion_to_patterm (tree type, tree value,
4271 stmt_vec_info stmt_info,
4272 vec_info *vinfo)
4274 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4275 return value;
4277 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4278 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4279 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4280 set_vinfo_for_stmt (conversion, new_stmt_info);
4281 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4282 append_pattern_def_seq (stmt_info, conversion);
4283 return new_value;
4286 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4287 internal function. Return the final statement on success and set
4288 *TYPE_OUT to the vector type being loaded or stored.
4290 This function only handles gathers and scatters that were recognized
4291 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4293 static gimple *
4294 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4296 /* Currently we only support this for loop vectorization. */
4297 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4298 if (!loop_vinfo)
4299 return NULL;
4301 /* Make sure that we're looking at a gather load or scatter store. */
4302 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4303 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4304 return NULL;
4306 /* Get the boolean that controls whether the load or store happens.
4307 This is null if the operation is unconditional. */
4308 gimple *stmt = stmt_info->stmt;
4309 tree mask = vect_get_load_store_mask (stmt);
4311 /* Make sure that the target supports an appropriate internal
4312 function for the gather/scatter operation. */
4313 gather_scatter_info gs_info;
4314 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4315 || gs_info.decl)
4316 return NULL;
4318 /* Convert the mask to the right form. */
4319 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4320 if (mask)
4321 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4322 loop_vinfo);
4324 /* Get the invariant base and non-invariant offset, converting the
4325 latter to the same width as the vector elements. */
4326 tree base = gs_info.base;
4327 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4328 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4329 stmt_info, loop_vinfo);
4331 /* Build the new pattern statement. */
4332 tree scale = size_int (gs_info.scale);
4333 gcall *pattern_stmt;
4334 if (DR_IS_READ (dr))
4336 if (mask != NULL)
4337 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4338 offset, scale, mask);
4339 else
4340 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4341 offset, scale);
4342 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4343 gimple_call_set_lhs (pattern_stmt, load_lhs);
4345 else
4347 tree rhs = vect_get_store_rhs (stmt);
4348 if (mask != NULL)
4349 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4350 base, offset, scale, rhs,
4351 mask);
4352 else
4353 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4354 base, offset, scale, rhs);
4356 gimple_call_set_nothrow (pattern_stmt, true);
4358 /* Copy across relevant vectorization info and associate DR with the
4359 new pattern statement instead of the original statement. */
4360 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4361 loop_vinfo);
4362 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4363 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4364 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4365 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4366 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4367 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4369 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4370 *type_out = vectype;
4371 vect_pattern_detected ("gather/scatter pattern", stmt);
4373 return pattern_stmt;
4376 /* Return true if TYPE is a non-boolean integer type. These are the types
4377 that we want to consider for narrowing. */
4379 static bool
4380 vect_narrowable_type_p (tree type)
4382 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4385 /* Return true if the operation given by CODE can be truncated to N bits
4386 when only N bits of the output are needed. This is only true if bit N+1
4387 of the inputs has no effect on the low N bits of the result. */
4389 static bool
4390 vect_truncatable_operation_p (tree_code code)
4392 switch (code)
4394 case PLUS_EXPR:
4395 case MINUS_EXPR:
4396 case MULT_EXPR:
4397 case BIT_AND_EXPR:
4398 case BIT_IOR_EXPR:
4399 case BIT_XOR_EXPR:
4400 case COND_EXPR:
4401 return true;
4403 default:
4404 return false;
4408 /* Record that STMT_INFO could be changed from operating on TYPE to
4409 operating on a type with the precision and sign given by PRECISION
4410 and SIGN respectively. PRECISION is an arbitrary bit precision;
4411 it might not be a whole number of bytes. */
4413 static void
4414 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4415 unsigned int precision, signop sign)
4417 /* Round the precision up to a whole number of bytes. */
4418 precision = vect_element_precision (precision);
4419 if (precision < TYPE_PRECISION (type)
4420 && (!stmt_info->operation_precision
4421 || stmt_info->operation_precision > precision))
4423 stmt_info->operation_precision = precision;
4424 stmt_info->operation_sign = sign;
4428 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4429 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4430 is an arbitrary bit precision; it might not be a whole number of bytes. */
4432 static void
4433 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4434 unsigned int min_input_precision)
4436 /* This operation in isolation only requires the inputs to have
4437 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4438 that MIN_INPUT_PRECISION is a natural precision for the chain
4439 as a whole. E.g. consider something like:
4441 unsigned short *x, *y;
4442 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4444 The right shift can be done on unsigned chars, and only requires the
4445 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4446 approach would mean turning a natural chain of single-vector unsigned
4447 short operations into one that truncates "*x" and then extends
4448 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4449 operation and one vector for each unsigned char operation.
4450 This would be a significant pessimization.
4452 Instead only propagate the maximum of this precision and the precision
4453 required by the users of the result. This means that we don't pessimize
4454 the case above but continue to optimize things like:
4456 unsigned char *y;
4457 unsigned short *x;
4458 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4460 Here we would truncate two vectors of *x to a single vector of
4461 unsigned chars and use single-vector unsigned char operations for
4462 everything else, rather than doing two unsigned short copies of
4463 "(*x & 0xf0) >> 4" and then truncating the result. */
4464 min_input_precision = MAX (min_input_precision,
4465 stmt_info->min_output_precision);
4467 if (min_input_precision < TYPE_PRECISION (type)
4468 && (!stmt_info->min_input_precision
4469 || stmt_info->min_input_precision > min_input_precision))
4470 stmt_info->min_input_precision = min_input_precision;
4473 /* Subroutine of vect_determine_min_output_precision. Return true if
4474 we can calculate a reduced number of output bits for STMT_INFO,
4475 whose result is LHS. */
4477 static bool
4478 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4480 /* Take the maximum precision required by users of the result. */
4481 unsigned int precision = 0;
4482 imm_use_iterator iter;
4483 use_operand_p use;
4484 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4486 gimple *use_stmt = USE_STMT (use);
4487 if (is_gimple_debug (use_stmt))
4488 continue;
4489 if (!vect_stmt_in_region_p (stmt_info->vinfo, use_stmt))
4490 return false;
4491 stmt_vec_info use_stmt_info = vinfo_for_stmt (use_stmt);
4492 if (!use_stmt_info->min_input_precision)
4493 return false;
4494 precision = MAX (precision, use_stmt_info->min_input_precision);
4497 if (dump_enabled_p ())
4499 dump_printf_loc (MSG_NOTE, vect_location, "only the low %d bits of ",
4500 precision);
4501 dump_generic_expr (MSG_NOTE, TDF_SLIM, lhs);
4502 dump_printf (MSG_NOTE, " are significant\n");
4504 stmt_info->min_output_precision = precision;
4505 return true;
4508 /* Calculate min_output_precision for STMT_INFO. */
4510 static void
4511 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4513 /* We're only interested in statements with a narrowable result. */
4514 tree lhs = gimple_get_lhs (stmt_info->stmt);
4515 if (!lhs
4516 || TREE_CODE (lhs) != SSA_NAME
4517 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4518 return;
4520 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4521 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4524 /* Use range information to decide whether STMT (described by STMT_INFO)
4525 could be done in a narrower type. This is effectively a forward
4526 propagation, since it uses context-independent information that applies
4527 to all users of an SSA name. */
4529 static void
4530 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4532 tree lhs = gimple_assign_lhs (stmt);
4533 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4534 return;
4536 tree type = TREE_TYPE (lhs);
4537 if (!vect_narrowable_type_p (type))
4538 return;
4540 /* First see whether we have any useful range information for the result. */
4541 unsigned int precision = TYPE_PRECISION (type);
4542 signop sign = TYPE_SIGN (type);
4543 wide_int min_value, max_value;
4544 if (!vect_get_range_info (lhs, &min_value, &max_value))
4545 return;
4547 tree_code code = gimple_assign_rhs_code (stmt);
4548 unsigned int nops = gimple_num_ops (stmt);
4550 if (!vect_truncatable_operation_p (code))
4551 /* Check that all relevant input operands are compatible, and update
4552 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4553 for (unsigned int i = 1; i < nops; ++i)
4555 tree op = gimple_op (stmt, i);
4556 if (TREE_CODE (op) == INTEGER_CST)
4558 /* Don't require the integer to have RHS_TYPE (which it might
4559 not for things like shift amounts, etc.), but do require it
4560 to fit the type. */
4561 if (!int_fits_type_p (op, type))
4562 return;
4564 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4565 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4567 else if (TREE_CODE (op) == SSA_NAME)
4569 /* Ignore codes that don't take uniform arguments. */
4570 if (!types_compatible_p (TREE_TYPE (op), type))
4571 return;
4573 wide_int op_min_value, op_max_value;
4574 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4575 return;
4577 min_value = wi::min (min_value, op_min_value, sign);
4578 max_value = wi::max (max_value, op_max_value, sign);
4580 else
4581 return;
4584 /* Try to switch signed types for unsigned types if we can.
4585 This is better for two reasons. First, unsigned ops tend
4586 to be cheaper than signed ops. Second, it means that we can
4587 handle things like:
4589 signed char c;
4590 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4594 signed char c;
4595 unsigned short res_1 = (unsigned short) c & 0xff00;
4596 int res = (int) res_1;
4598 where the intermediate result res_1 has unsigned rather than
4599 signed type. */
4600 if (sign == SIGNED && !wi::neg_p (min_value))
4601 sign = UNSIGNED;
4603 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4604 unsigned int precision1 = wi::min_precision (min_value, sign);
4605 unsigned int precision2 = wi::min_precision (max_value, sign);
4606 unsigned int value_precision = MAX (precision1, precision2);
4607 if (value_precision >= precision)
4608 return;
4610 if (dump_enabled_p ())
4612 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4613 " without loss of precision: ",
4614 sign == SIGNED ? "signed" : "unsigned",
4615 value_precision);
4616 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4619 vect_set_operation_type (stmt_info, type, value_precision, sign);
4620 vect_set_min_input_precision (stmt_info, type, value_precision);
4623 /* Use information about the users of STMT's result to decide whether
4624 STMT (described by STMT_INFO) could be done in a narrower type.
4625 This is effectively a backward propagation. */
4627 static void
4628 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4630 tree_code code = gimple_assign_rhs_code (stmt);
4631 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4632 tree type = TREE_TYPE (gimple_op (stmt, opno));
4633 if (!vect_narrowable_type_p (type))
4634 return;
4636 unsigned int precision = TYPE_PRECISION (type);
4637 unsigned int operation_precision, min_input_precision;
4638 switch (code)
4640 CASE_CONVERT:
4641 /* Only the bits that contribute to the output matter. Don't change
4642 the precision of the operation itself. */
4643 operation_precision = precision;
4644 min_input_precision = stmt_info->min_output_precision;
4645 break;
4647 case LSHIFT_EXPR:
4648 case RSHIFT_EXPR:
4650 tree shift = gimple_assign_rhs2 (stmt);
4651 if (TREE_CODE (shift) != INTEGER_CST
4652 || !wi::ltu_p (wi::to_widest (shift), precision))
4653 return;
4654 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4655 if (code == LSHIFT_EXPR)
4657 /* We need CONST_SHIFT fewer bits of the input. */
4658 operation_precision = stmt_info->min_output_precision;
4659 min_input_precision = (MAX (operation_precision, const_shift)
4660 - const_shift);
4662 else
4664 /* We need CONST_SHIFT extra bits to do the operation. */
4665 operation_precision = (stmt_info->min_output_precision
4666 + const_shift);
4667 min_input_precision = operation_precision;
4669 break;
4672 default:
4673 if (vect_truncatable_operation_p (code))
4675 /* Input bit N has no effect on output bits N-1 and lower. */
4676 operation_precision = stmt_info->min_output_precision;
4677 min_input_precision = operation_precision;
4678 break;
4680 return;
4683 if (operation_precision < precision)
4685 if (dump_enabled_p ())
4687 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4688 " without affecting users: ",
4689 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4690 operation_precision);
4691 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4693 vect_set_operation_type (stmt_info, type, operation_precision,
4694 TYPE_SIGN (type));
4696 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4699 /* Handle vect_determine_precisions for STMT_INFO, given that we
4700 have already done so for the users of its result. */
4702 void
4703 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4705 vect_determine_min_output_precision (stmt_info);
4706 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4708 vect_determine_precisions_from_range (stmt_info, stmt);
4709 vect_determine_precisions_from_users (stmt_info, stmt);
4713 /* Walk backwards through the vectorizable region to determine the
4714 values of these fields:
4716 - min_output_precision
4717 - min_input_precision
4718 - operation_precision
4719 - operation_sign. */
4721 void
4722 vect_determine_precisions (vec_info *vinfo)
4724 DUMP_VECT_SCOPE ("vect_determine_precisions");
4726 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4728 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4729 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4730 unsigned int nbbs = loop->num_nodes;
4732 for (unsigned int i = 0; i < nbbs; i++)
4734 basic_block bb = bbs[nbbs - i - 1];
4735 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4736 !gsi_end_p (si); gsi_prev (&si))
4737 vect_determine_stmt_precisions (vinfo_for_stmt (gsi_stmt (si)));
4740 else
4742 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4743 gimple_stmt_iterator si = bb_vinfo->region_end;
4744 gimple *stmt;
4747 if (!gsi_stmt (si))
4748 si = gsi_last_bb (bb_vinfo->bb);
4749 else
4750 gsi_prev (&si);
4751 stmt = gsi_stmt (si);
4752 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4753 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4754 vect_determine_stmt_precisions (stmt_info);
4756 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4760 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
4762 struct vect_recog_func
4764 vect_recog_func_ptr fn;
4765 const char *name;
4768 /* Note that ordering matters - the first pattern matching on a stmt is
4769 taken which means usually the more complex one needs to preceed the
4770 less comples onex (widen_sum only after dot_prod or sad for example). */
4771 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4772 { vect_recog_over_widening_pattern, "over_widening" },
4773 /* Must come after over_widening, which narrows the shift as much as
4774 possible beforehand. */
4775 { vect_recog_average_pattern, "average" },
4776 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4777 { vect_recog_widen_mult_pattern, "widen_mult" },
4778 { vect_recog_dot_prod_pattern, "dot_prod" },
4779 { vect_recog_sad_pattern, "sad" },
4780 { vect_recog_widen_sum_pattern, "widen_sum" },
4781 { vect_recog_pow_pattern, "pow" },
4782 { vect_recog_widen_shift_pattern, "widen_shift" },
4783 { vect_recog_rotate_pattern, "rotate" },
4784 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4785 { vect_recog_divmod_pattern, "divmod" },
4786 { vect_recog_mult_pattern, "mult" },
4787 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4788 { vect_recog_bool_pattern, "bool" },
4789 /* This must come before mask conversion, and includes the parts
4790 of mask conversion that are needed for gather and scatter
4791 internal functions. */
4792 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4793 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4796 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4798 /* Mark statements that are involved in a pattern. */
4800 static inline void
4801 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4802 tree pattern_vectype)
4804 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4805 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4807 bool old_pattern_p = is_pattern_stmt_p (orig_stmt_info);
4808 if (old_pattern_p)
4810 /* We're replacing a statement in an existing pattern definition
4811 sequence. */
4812 if (dump_enabled_p ())
4814 dump_printf_loc (MSG_NOTE, vect_location,
4815 "replacing earlier pattern ");
4816 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, orig_stmt, 0);
4819 /* To keep the book-keeping simple, just swap the lhs of the
4820 old and new statements, so that the old one has a valid but
4821 unused lhs. */
4822 tree old_lhs = gimple_get_lhs (orig_stmt);
4823 gimple_set_lhs (orig_stmt, gimple_get_lhs (pattern_stmt));
4824 gimple_set_lhs (pattern_stmt, old_lhs);
4826 if (dump_enabled_p ())
4828 dump_printf_loc (MSG_NOTE, vect_location, "with ");
4829 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4832 /* Switch to the statement that ORIG replaces. */
4833 orig_stmt_info
4834 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (orig_stmt_info));
4836 /* We shouldn't be replacing the main pattern statement. */
4837 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) != orig_stmt);
4840 if (def_seq)
4841 for (gimple_stmt_iterator si = gsi_start (def_seq);
4842 !gsi_end_p (si); gsi_next (&si))
4843 vect_init_pattern_stmt (gsi_stmt (si), orig_stmt_info, pattern_vectype);
4845 if (old_pattern_p)
4847 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4849 /* Insert all the new pattern statements before the original one. */
4850 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4851 gimple_stmt_iterator gsi = gsi_for_stmt (orig_stmt, orig_def_seq);
4852 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4853 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4855 /* Remove the pattern statement that this new pattern replaces. */
4856 gsi_remove (&gsi, false);
4858 else
4859 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4862 /* Function vect_pattern_recog_1
4864 Input:
4865 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4866 computation pattern.
4867 STMT: A stmt from which the pattern search should start.
4869 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4870 a sequence of statements that has the same functionality and can be
4871 used to replace STMT. It returns the last statement in the sequence
4872 and adds any earlier statements to STMT's STMT_VINFO_PATTERN_DEF_SEQ.
4873 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4874 statement, having first checked that the target supports the new operation
4875 in that type.
4877 This function also does some bookkeeping, as explained in the documentation
4878 for vect_recog_pattern. */
4880 static void
4881 vect_pattern_recog_1 (vect_recog_func *recog_func, gimple_stmt_iterator si)
4883 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4884 stmt_vec_info stmt_info;
4885 loop_vec_info loop_vinfo;
4886 tree pattern_vectype;
4888 /* If this statement has already been replaced with pattern statements,
4889 leave the original statement alone, since the first match wins.
4890 Instead try to match against the definition statements that feed
4891 the main pattern statement. */
4892 stmt_info = vinfo_for_stmt (stmt);
4893 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4895 gimple_stmt_iterator gsi;
4896 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4897 !gsi_end_p (gsi); gsi_next (&gsi))
4898 vect_pattern_recog_1 (recog_func, gsi);
4899 return;
4902 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
4903 if (!pattern_stmt)
4905 /* Clear any half-formed pattern definition sequence. */
4906 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4907 return;
4910 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4911 gcc_assert (pattern_vectype);
4913 /* Found a vectorizable pattern. */
4914 if (dump_enabled_p ())
4916 dump_printf_loc (MSG_NOTE, vect_location,
4917 "%s pattern recognized: ", recog_func->name);
4918 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4921 /* Mark the stmts that are involved in the pattern. */
4922 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4924 /* Patterns cannot be vectorized using SLP, because they change the order of
4925 computation. */
4926 if (loop_vinfo)
4928 unsigned ix, ix2;
4929 gimple **elem_ptr;
4930 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4931 elem_ptr, *elem_ptr == stmt);
4936 /* Function vect_pattern_recog
4938 Input:
4939 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4940 computation idioms.
4942 Output - for each computation idiom that is detected we create a new stmt
4943 that provides the same functionality and that can be vectorized. We
4944 also record some information in the struct_stmt_info of the relevant
4945 stmts, as explained below:
4947 At the entry to this function we have the following stmts, with the
4948 following initial value in the STMT_VINFO fields:
4950 stmt in_pattern_p related_stmt vec_stmt
4951 S1: a_i = .... - - -
4952 S2: a_2 = ..use(a_i).. - - -
4953 S3: a_1 = ..use(a_2).. - - -
4954 S4: a_0 = ..use(a_1).. - - -
4955 S5: ... = ..use(a_0).. - - -
4957 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4958 represented by a single stmt. We then:
4959 - create a new stmt S6 equivalent to the pattern (the stmt is not
4960 inserted into the code)
4961 - fill in the STMT_VINFO fields as follows:
4963 in_pattern_p related_stmt vec_stmt
4964 S1: a_i = .... - - -
4965 S2: a_2 = ..use(a_i).. - - -
4966 S3: a_1 = ..use(a_2).. - - -
4967 S4: a_0 = ..use(a_1).. true S6 -
4968 '---> S6: a_new = .... - S4 -
4969 S5: ... = ..use(a_0).. - - -
4971 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4972 to each other through the RELATED_STMT field).
4974 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4975 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4976 remain irrelevant unless used by stmts other than S4.
4978 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4979 (because they are marked as irrelevant). It will vectorize S6, and record
4980 a pointer to the new vector stmt VS6 from S6 (as usual).
4981 S4 will be skipped, and S5 will be vectorized as usual:
4983 in_pattern_p related_stmt vec_stmt
4984 S1: a_i = .... - - -
4985 S2: a_2 = ..use(a_i).. - - -
4986 S3: a_1 = ..use(a_2).. - - -
4987 > VS6: va_new = .... - - -
4988 S4: a_0 = ..use(a_1).. true S6 VS6
4989 '---> S6: a_new = .... - S4 VS6
4990 > VS5: ... = ..vuse(va_new).. - - -
4991 S5: ... = ..use(a_0).. - - -
4993 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4994 elsewhere), and we'll end up with:
4996 VS6: va_new = ....
4997 VS5: ... = ..vuse(va_new)..
4999 In case of more than one pattern statements, e.g., widen-mult with
5000 intermediate type:
5002 S1 a_t = ;
5003 S2 a_T = (TYPE) a_t;
5004 '--> S3: a_it = (interm_type) a_t;
5005 S4 prod_T = a_T * CONST;
5006 '--> S5: prod_T' = a_it w* CONST;
5008 there may be other users of a_T outside the pattern. In that case S2 will
5009 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5010 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5011 be recorded in S3. */
5013 void
5014 vect_pattern_recog (vec_info *vinfo)
5016 struct loop *loop;
5017 basic_block *bbs;
5018 unsigned int nbbs;
5019 gimple_stmt_iterator si;
5020 unsigned int i, j;
5022 vect_determine_precisions (vinfo);
5024 DUMP_VECT_SCOPE ("vect_pattern_recog");
5026 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5028 loop = LOOP_VINFO_LOOP (loop_vinfo);
5029 bbs = LOOP_VINFO_BBS (loop_vinfo);
5030 nbbs = loop->num_nodes;
5032 /* Scan through the loop stmts, applying the pattern recognition
5033 functions starting at each stmt visited: */
5034 for (i = 0; i < nbbs; i++)
5036 basic_block bb = bbs[i];
5037 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5038 /* Scan over all generic vect_recog_xxx_pattern functions. */
5039 for (j = 0; j < NUM_PATTERNS; j++)
5040 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si);
5043 else
5045 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5046 for (si = bb_vinfo->region_begin;
5047 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
5049 gimple *stmt = gsi_stmt (si);
5050 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5051 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
5052 continue;
5054 /* Scan over all generic vect_recog_xxx_pattern functions. */
5055 for (j = 0; j < NUM_PATTERNS; j++)
5056 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si);