Fix build on sparc64-linux-gnu.
[official-gcc.git] / gcc / tree-vect-patterns.c
blob3053642b24108350d092fb6955beb5f9752086ca
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
53 static bool
54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
56 value_range_kind vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
71 return true;
73 else
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
80 return false;
84 /* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
87 static void
88 vect_pattern_detected (const char *name, gimple *stmt)
90 if (dump_enabled_p ())
91 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
94 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
95 return the pattern statement's stmt_vec_info. Set its vector type to
96 VECTYPE if it doesn't have one already. */
98 static stmt_vec_info
99 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
100 tree vectype)
102 vec_info *vinfo = orig_stmt_info->vinfo;
103 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
104 if (pattern_stmt_info == NULL)
105 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
106 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
108 pattern_stmt_info->pattern_stmt_p = true;
109 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
110 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
111 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
112 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
113 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
114 return pattern_stmt_info;
117 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
118 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
119 have one already. */
121 static void
122 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
123 tree vectype)
125 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
126 STMT_VINFO_RELATED_STMT (orig_stmt_info)
127 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
130 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
131 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
132 be different from the vector type of the final pattern statement. */
134 static inline void
135 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
136 tree vectype = NULL_TREE)
138 vec_info *vinfo = stmt_info->vinfo;
139 if (vectype)
141 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
142 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
144 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
145 new_stmt);
148 /* The caller wants to perform new operations on vect_external variable
149 VAR, so that the result of the operations would also be vect_external.
150 Return the edge on which the operations can be performed, if one exists.
151 Return null if the operations should instead be treated as part of
152 the pattern that needs them. */
154 static edge
155 vect_get_external_def_edge (vec_info *vinfo, tree var)
157 edge e = NULL;
158 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
160 e = loop_preheader_edge (loop_vinfo->loop);
161 if (!SSA_NAME_IS_DEFAULT_DEF (var))
163 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
164 if (bb == NULL
165 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
166 e = NULL;
169 return e;
172 /* Return true if the target supports a vector version of CODE,
173 where CODE is known to map to a direct optab. ITYPE specifies
174 the type of (some of) the scalar inputs and OTYPE specifies the
175 type of the scalar result.
177 If CODE allows the inputs and outputs to have different type
178 (such as for WIDEN_SUM_EXPR), it is the input mode rather
179 than the output mode that determines the appropriate target pattern.
180 Operand 0 of the target pattern then specifies the mode that the output
181 must have.
183 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
184 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
185 is nonnull. */
187 static bool
188 vect_supportable_direct_optab_p (tree otype, tree_code code,
189 tree itype, tree *vecotype_out,
190 tree *vecitype_out = NULL)
192 tree vecitype = get_vectype_for_scalar_type (itype);
193 if (!vecitype)
194 return false;
196 tree vecotype = get_vectype_for_scalar_type (otype);
197 if (!vecotype)
198 return false;
200 optab optab = optab_for_tree_code (code, vecitype, optab_default);
201 if (!optab)
202 return false;
204 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
205 if (icode == CODE_FOR_nothing
206 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
207 return false;
209 *vecotype_out = vecotype;
210 if (vecitype_out)
211 *vecitype_out = vecitype;
212 return true;
215 /* Round bit precision PRECISION up to a full element. */
217 static unsigned int
218 vect_element_precision (unsigned int precision)
220 precision = 1 << ceil_log2 (precision);
221 return MAX (precision, BITS_PER_UNIT);
224 /* If OP is defined by a statement that's being considered for vectorization,
225 return information about that statement, otherwise return NULL. */
227 static stmt_vec_info
228 vect_get_internal_def (vec_info *vinfo, tree op)
230 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
231 if (def_stmt_info
232 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
233 return def_stmt_info;
234 return NULL;
237 /* Check whether NAME, an ssa-name used in STMT_VINFO,
238 is a result of a type promotion, such that:
239 DEF_STMT: NAME = NOP (name0)
240 If CHECK_SIGN is TRUE, check that either both types are signed or both are
241 unsigned. */
243 static bool
244 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
245 tree *orig_type, gimple **def_stmt, bool *promotion)
247 tree type = TREE_TYPE (name);
248 tree oprnd0;
249 enum vect_def_type dt;
251 stmt_vec_info def_stmt_info;
252 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
253 def_stmt))
254 return false;
256 if (dt != vect_internal_def
257 && dt != vect_external_def && dt != vect_constant_def)
258 return false;
260 if (!*def_stmt)
261 return false;
263 if (!is_gimple_assign (*def_stmt))
264 return false;
266 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
267 return false;
269 oprnd0 = gimple_assign_rhs1 (*def_stmt);
271 *orig_type = TREE_TYPE (oprnd0);
272 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
273 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
274 return false;
276 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
277 *promotion = true;
278 else
279 *promotion = false;
281 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
282 return false;
284 return true;
287 /* Holds information about an input operand after some sign changes
288 and type promotions have been peeled away. */
289 struct vect_unpromoted_value {
290 vect_unpromoted_value ();
292 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
294 /* The value obtained after peeling away zero or more casts. */
295 tree op;
297 /* The type of OP. */
298 tree type;
300 /* The definition type of OP. */
301 vect_def_type dt;
303 /* If OP is the result of peeling at least one cast, and if the cast
304 of OP itself is a vectorizable statement, CASTER identifies that
305 statement, otherwise it is null. */
306 stmt_vec_info caster;
309 inline vect_unpromoted_value::vect_unpromoted_value ()
310 : op (NULL_TREE),
311 type (NULL_TREE),
312 dt (vect_uninitialized_def),
313 caster (NULL)
317 /* Set the operand to OP_IN, its definition type to DT_IN, and the
318 statement that casts it to CASTER_IN. */
320 inline void
321 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
322 stmt_vec_info caster_in)
324 op = op_in;
325 type = TREE_TYPE (op);
326 dt = dt_in;
327 caster = caster_in;
330 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
331 to reach some vectorizable inner operand OP', continuing as long as it
332 is possible to convert OP' back to OP using a possible sign change
333 followed by a possible promotion P. Return this OP', or null if OP is
334 not a vectorizable SSA name. If there is a promotion P, describe its
335 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
336 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
337 have more than one user.
339 A successful return means that it is possible to go from OP' to OP
340 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
341 whereas the cast from UNPROM to OP might be a promotion, a sign
342 change, or a nop.
344 E.g. say we have:
346 signed short *ptr = ...;
347 signed short C = *ptr;
348 unsigned short B = (unsigned short) C; // sign change
349 signed int A = (signed int) B; // unsigned promotion
350 ...possible other uses of A...
351 unsigned int OP = (unsigned int) A; // sign change
353 In this case it's possible to go directly from C to OP using:
355 OP = (unsigned int) (unsigned short) C;
356 +------------+ +--------------+
357 promotion sign change
359 so OP' would be C. The input to the promotion is B, so UNPROM
360 would describe B. */
362 static tree
363 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
364 vect_unpromoted_value *unprom,
365 bool *single_use_p = NULL)
367 tree res = NULL_TREE;
368 tree op_type = TREE_TYPE (op);
369 unsigned int orig_precision = TYPE_PRECISION (op_type);
370 stmt_vec_info caster = NULL;
371 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
373 /* See whether OP is simple enough to vectorize. */
374 stmt_vec_info def_stmt_info;
375 gimple *def_stmt;
376 vect_def_type dt;
377 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
378 break;
380 /* If OP is the input of a demotion, skip over it to see whether
381 OP is itself the result of a promotion. If so, the combined
382 effect of the promotion and the demotion might fit the required
383 pattern, otherwise neither operation fits.
385 This copes with cases such as the result of an arithmetic
386 operation being truncated before being stored, and where that
387 arithmetic operation has been recognized as an over-widened one. */
388 if (TYPE_PRECISION (op_type) <= orig_precision)
390 /* Use OP as the UNPROM described above if we haven't yet
391 found a promotion, or if using the new input preserves the
392 sign of the previous promotion. */
393 if (!res
394 || TYPE_PRECISION (unprom->type) == orig_precision
395 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
396 unprom->set_op (op, dt, caster);
397 /* Stop if we've already seen a promotion and if this
398 conversion does more than change the sign. */
399 else if (TYPE_PRECISION (op_type)
400 != TYPE_PRECISION (unprom->type))
401 break;
403 /* The sequence now extends to OP. */
404 res = op;
407 /* See whether OP is defined by a cast. Record it as CASTER if
408 the cast is potentially vectorizable. */
409 if (!def_stmt)
410 break;
411 caster = def_stmt_info;
413 /* Ignore pattern statements, since we don't link uses for them. */
414 if (caster
415 && single_use_p
416 && !STMT_VINFO_RELATED_STMT (caster)
417 && !has_single_use (res))
418 *single_use_p = false;
420 gassign *assign = dyn_cast <gassign *> (def_stmt);
421 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
422 break;
424 /* Continue with the input to the cast. */
425 op = gimple_assign_rhs1 (def_stmt);
426 op_type = TREE_TYPE (op);
428 return res;
431 /* OP is an integer operand to an operation that returns TYPE, and we
432 want to treat the operation as a widening one. So far we can treat
433 it as widening from *COMMON_TYPE.
435 Return true if OP is suitable for such a widening operation,
436 either widening from *COMMON_TYPE or from some supertype of it.
437 Update *COMMON_TYPE to the supertype in the latter case.
439 SHIFT_P is true if OP is a shift amount. */
441 static bool
442 vect_joust_widened_integer (tree type, bool shift_p, tree op,
443 tree *common_type)
445 /* Calculate the minimum precision required by OP, without changing
446 the sign of either operand. */
447 unsigned int precision;
448 if (shift_p)
450 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
451 return false;
452 precision = TREE_INT_CST_LOW (op);
454 else
456 precision = wi::min_precision (wi::to_widest (op),
457 TYPE_SIGN (*common_type));
458 if (precision * 2 > TYPE_PRECISION (type))
459 return false;
462 /* If OP requires a wider type, switch to that type. The checks
463 above ensure that this is still narrower than the result. */
464 precision = vect_element_precision (precision);
465 if (TYPE_PRECISION (*common_type) < precision)
466 *common_type = build_nonstandard_integer_type
467 (precision, TYPE_UNSIGNED (*common_type));
468 return true;
471 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
472 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
474 static bool
475 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
477 if (types_compatible_p (*common_type, new_type))
478 return true;
480 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
481 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
482 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
483 return true;
485 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
486 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
487 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
489 *common_type = new_type;
490 return true;
493 /* We have mismatched signs, with the signed type being
494 no wider than the unsigned type. In this case we need
495 a wider signed type. */
496 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
497 TYPE_PRECISION (new_type));
498 precision *= 2;
499 if (precision * 2 > TYPE_PRECISION (type))
500 return false;
502 *common_type = build_nonstandard_integer_type (precision, false);
503 return true;
506 /* Check whether STMT_INFO can be viewed as a tree of integer operations
507 in which each node either performs CODE or WIDENED_CODE, and where
508 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
509 specifies the maximum number of leaf operands. SHIFT_P says whether
510 CODE and WIDENED_CODE are some sort of shift.
512 If STMT_INFO is such a tree, return the number of leaf operands
513 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
514 to a type that (a) is narrower than the result of STMT_INFO and
515 (b) can hold all leaf operand values.
517 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
518 exists. */
520 static unsigned int
521 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
522 tree_code widened_code, bool shift_p,
523 unsigned int max_nops,
524 vect_unpromoted_value *unprom, tree *common_type)
526 /* Check for an integer operation with the right code. */
527 vec_info *vinfo = stmt_info->vinfo;
528 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
529 if (!assign)
530 return 0;
532 tree_code rhs_code = gimple_assign_rhs_code (assign);
533 if (rhs_code != code && rhs_code != widened_code)
534 return 0;
536 tree type = gimple_expr_type (assign);
537 if (!INTEGRAL_TYPE_P (type))
538 return 0;
540 /* Assume that both operands will be leaf operands. */
541 max_nops -= 2;
543 /* Check the operands. */
544 unsigned int next_op = 0;
545 for (unsigned int i = 0; i < 2; ++i)
547 vect_unpromoted_value *this_unprom = &unprom[next_op];
548 unsigned int nops = 1;
549 tree op = gimple_op (assign, i + 1);
550 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
552 /* We already have a common type from earlier operands.
553 Update it to account for OP. */
554 this_unprom->set_op (op, vect_constant_def);
555 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
556 return 0;
558 else
560 /* Only allow shifts by constants. */
561 if (shift_p && i == 1)
562 return 0;
564 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
565 this_unprom))
566 return 0;
568 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
570 /* The operand isn't widened. If STMT_INFO has the code
571 for an unwidened operation, recursively check whether
572 this operand is a node of the tree. */
573 if (rhs_code != code
574 || max_nops == 0
575 || this_unprom->dt != vect_internal_def)
576 return 0;
578 /* Give back the leaf slot allocated above now that we're
579 not treating this as a leaf operand. */
580 max_nops += 1;
582 /* Recursively process the definition of the operand. */
583 stmt_vec_info def_stmt_info
584 = vinfo->lookup_def (this_unprom->op);
585 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
586 shift_p, max_nops, this_unprom,
587 common_type);
588 if (nops == 0)
589 return 0;
591 max_nops -= nops;
593 else
595 /* Make sure that the operand is narrower than the result. */
596 if (TYPE_PRECISION (this_unprom->type) * 2
597 > TYPE_PRECISION (type))
598 return 0;
600 /* Update COMMON_TYPE for the new operand. */
601 if (i == 0)
602 *common_type = this_unprom->type;
603 else if (!vect_joust_widened_type (type, this_unprom->type,
604 common_type))
605 return 0;
608 next_op += nops;
610 return next_op;
613 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
614 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
616 static tree
617 vect_recog_temp_ssa_var (tree type, gimple *stmt)
619 return make_temp_ssa_name (type, stmt, "patt");
622 /* STMT2_INFO describes a type conversion that could be split into STMT1
623 followed by a version of STMT2_INFO that takes NEW_RHS as its first
624 input. Try to do this using pattern statements, returning true on
625 success. */
627 static bool
628 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
629 gimple *stmt1, tree vectype)
631 if (is_pattern_stmt_p (stmt2_info))
633 /* STMT2_INFO is part of a pattern. Get the statement to which
634 the pattern is attached. */
635 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
636 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE, vect_location,
640 "Splitting pattern statement: %G", stmt2_info->stmt);
642 /* Since STMT2_INFO is a pattern statement, we can change it
643 in-situ without worrying about changing the code for the
644 containing block. */
645 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
647 if (dump_enabled_p ())
649 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
650 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
651 stmt2_info->stmt);
654 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
655 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
656 /* STMT2_INFO is the actual pattern statement. Add STMT1
657 to the end of the definition sequence. */
658 gimple_seq_add_stmt_without_update (def_seq, stmt1);
659 else
661 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
662 before it. */
663 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
664 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
666 return true;
668 else
670 /* STMT2_INFO doesn't yet have a pattern. Try to create a
671 two-statement pattern now. */
672 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
673 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
674 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
675 if (!lhs_vectype)
676 return false;
678 if (dump_enabled_p ())
679 dump_printf_loc (MSG_NOTE, vect_location,
680 "Splitting statement: %G", stmt2_info->stmt);
682 /* Add STMT1 as a singleton pattern definition sequence. */
683 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
684 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
685 gimple_seq_add_stmt_without_update (def_seq, stmt1);
687 /* Build the second of the two pattern statements. */
688 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
689 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
690 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
692 if (dump_enabled_p ())
694 dump_printf_loc (MSG_NOTE, vect_location,
695 "into pattern statements: %G", stmt1);
696 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
699 return true;
703 /* Convert UNPROM to TYPE and return the result, adding new statements
704 to STMT_INFO's pattern definition statements if no better way is
705 available. VECTYPE is the vector form of TYPE. */
707 static tree
708 vect_convert_input (stmt_vec_info stmt_info, tree type,
709 vect_unpromoted_value *unprom, tree vectype)
711 /* Check for a no-op conversion. */
712 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
713 return unprom->op;
715 /* Allow the caller to create constant vect_unpromoted_values. */
716 if (TREE_CODE (unprom->op) == INTEGER_CST)
717 return wide_int_to_tree (type, wi::to_widest (unprom->op));
719 /* See if we can reuse an existing result. */
720 if (unprom->caster)
722 tree lhs = gimple_get_lhs (unprom->caster->stmt);
723 if (types_compatible_p (TREE_TYPE (lhs), type))
724 return lhs;
727 /* We need a new conversion statement. */
728 tree new_op = vect_recog_temp_ssa_var (type, NULL);
729 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
731 /* If the operation is the input to a vectorizable cast, try splitting
732 that cast into two, taking the required result as a mid-way point. */
733 if (unprom->caster)
735 tree lhs = gimple_get_lhs (unprom->caster->stmt);
736 if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
737 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
738 && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
739 && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
740 return new_op;
743 /* If OP is an external value, see if we can insert the new statement
744 on an incoming edge. */
745 if (unprom->dt == vect_external_def)
746 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
748 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
749 gcc_assert (!new_bb);
750 return new_op;
753 /* As a (common) last resort, add the statement to the pattern itself. */
754 append_pattern_def_seq (stmt_info, new_stmt, vectype);
755 return new_op;
758 /* Invoke vect_convert_input for N elements of UNPROM and store the
759 result in the corresponding elements of RESULT. */
761 static void
762 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
763 tree *result, tree type, vect_unpromoted_value *unprom,
764 tree vectype)
766 for (unsigned int i = 0; i < n; ++i)
768 unsigned int j;
769 for (j = 0; j < i; ++j)
770 if (unprom[j].op == unprom[i].op)
771 break;
772 if (j < i)
773 result[i] = result[j];
774 else
775 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
779 /* The caller has created a (possibly empty) sequence of pattern definition
780 statements followed by a single statement PATTERN_STMT. Cast the result
781 of this final statement to TYPE. If a new statement is needed, add
782 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
783 and return the new statement, otherwise return PATTERN_STMT as-is.
784 VECITYPE is the vector form of PATTERN_STMT's result type. */
786 static gimple *
787 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
788 tree vecitype)
790 tree lhs = gimple_get_lhs (pattern_stmt);
791 if (!types_compatible_p (type, TREE_TYPE (lhs)))
793 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
794 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
795 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
797 return pattern_stmt;
800 /* Return true if STMT_VINFO describes a reduction for which reassociation
801 is allowed. If STMT_INFO is part of a group, assume that it's part of
802 a reduction chain and optimistically assume that all statements
803 except the last allow reassociation. */
805 static bool
806 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
808 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
809 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
810 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
813 /* As above, but also require it to have code CODE and to be a reduction
814 in the outermost loop. When returning true, store the operands in
815 *OP0_OUT and *OP1_OUT. */
817 static bool
818 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
819 tree *op0_out, tree *op1_out)
821 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
822 if (!loop_info)
823 return false;
825 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
826 if (!assign || gimple_assign_rhs_code (assign) != code)
827 return false;
829 /* We don't allow changing the order of the computation in the inner-loop
830 when doing outer-loop vectorization. */
831 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
832 if (loop && nested_in_vect_loop_p (loop, stmt_info))
833 return false;
835 if (!vect_reassociating_reduction_p (stmt_info))
836 return false;
838 *op0_out = gimple_assign_rhs1 (assign);
839 *op1_out = gimple_assign_rhs2 (assign);
840 return true;
843 /* Function vect_recog_dot_prod_pattern
845 Try to find the following pattern:
847 type x_t, y_t;
848 TYPE1 prod;
849 TYPE2 sum = init;
850 loop:
851 sum_0 = phi <init, sum_1>
852 S1 x_t = ...
853 S2 y_t = ...
854 S3 x_T = (TYPE1) x_t;
855 S4 y_T = (TYPE1) y_t;
856 S5 prod = x_T * y_T;
857 [S6 prod = (TYPE2) prod; #optional]
858 S7 sum_1 = prod + sum_0;
860 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
861 same size of 'TYPE1' or bigger. This is a special case of a reduction
862 computation.
864 Input:
866 * STMT_VINFO: The stmt from which the pattern search begins. In the
867 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
868 will be detected.
870 Output:
872 * TYPE_OUT: The type of the output of this pattern.
874 * Return value: A new stmt that will be used to replace the sequence of
875 stmts that constitute the pattern. In this case it will be:
876 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
878 Note: The dot-prod idiom is a widening reduction pattern that is
879 vectorized without preserving all the intermediate results. It
880 produces only N/2 (widened) results (by summing up pairs of
881 intermediate results) rather than all N results. Therefore, we
882 cannot allow this pattern when we want to get all the results and in
883 the correct order (as is the case when this computation is in an
884 inner-loop nested in an outer-loop that us being vectorized). */
886 static gimple *
887 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
889 tree oprnd0, oprnd1;
890 gimple *last_stmt = stmt_vinfo->stmt;
891 vec_info *vinfo = stmt_vinfo->vinfo;
892 tree type, half_type;
893 gimple *pattern_stmt;
894 tree var;
896 /* Look for the following pattern
897 DX = (TYPE1) X;
898 DY = (TYPE1) Y;
899 DPROD = DX * DY;
900 DDPROD = (TYPE2) DPROD;
901 sum_1 = DDPROD + sum_0;
902 In which
903 - DX is double the size of X
904 - DY is double the size of Y
905 - DX, DY, DPROD all have the same type
906 - sum is the same size of DPROD or bigger
907 - sum has been recognized as a reduction variable.
909 This is equivalent to:
910 DPROD = X w* Y; #widen mult
911 sum_1 = DPROD w+ sum_0; #widen summation
913 DPROD = X w* Y; #widen mult
914 sum_1 = DPROD + sum_0; #summation
917 /* Starting from LAST_STMT, follow the defs of its uses in search
918 of the above pattern. */
920 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
921 &oprnd0, &oprnd1))
922 return NULL;
924 type = gimple_expr_type (last_stmt);
926 vect_unpromoted_value unprom_mult;
927 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
929 /* So far so good. Since last_stmt was detected as a (summation) reduction,
930 we know that oprnd1 is the reduction variable (defined by a loop-header
931 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
932 Left to check that oprnd0 is defined by a (widen_)mult_expr */
933 if (!oprnd0)
934 return NULL;
936 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
937 if (!mult_vinfo)
938 return NULL;
940 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
941 inside the loop (in case we are analyzing an outer-loop). */
942 vect_unpromoted_value unprom0[2];
943 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
944 false, 2, unprom0, &half_type))
945 return NULL;
947 /* If there are two widening operations, make sure they agree on
948 the sign of the extension. */
949 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
950 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
951 return NULL;
953 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
955 tree half_vectype;
956 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
957 type_out, &half_vectype))
958 return NULL;
960 /* Get the inputs in the appropriate types. */
961 tree mult_oprnd[2];
962 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
963 unprom0, half_vectype);
965 var = vect_recog_temp_ssa_var (type, NULL);
966 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
967 mult_oprnd[0], mult_oprnd[1], oprnd1);
969 return pattern_stmt;
973 /* Function vect_recog_sad_pattern
975 Try to find the following Sum of Absolute Difference (SAD) pattern:
977 type x_t, y_t;
978 signed TYPE1 diff, abs_diff;
979 TYPE2 sum = init;
980 loop:
981 sum_0 = phi <init, sum_1>
982 S1 x_t = ...
983 S2 y_t = ...
984 S3 x_T = (TYPE1) x_t;
985 S4 y_T = (TYPE1) y_t;
986 S5 diff = x_T - y_T;
987 S6 abs_diff = ABS_EXPR <diff>;
988 [S7 abs_diff = (TYPE2) abs_diff; #optional]
989 S8 sum_1 = abs_diff + sum_0;
991 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
992 same size of 'TYPE1' or bigger. This is a special case of a reduction
993 computation.
995 Input:
997 * STMT_VINFO: The stmt from which the pattern search begins. In the
998 example, when this function is called with S8, the pattern
999 {S3,S4,S5,S6,S7,S8} will be detected.
1001 Output:
1003 * TYPE_OUT: The type of the output of this pattern.
1005 * Return value: A new stmt that will be used to replace the sequence of
1006 stmts that constitute the pattern. In this case it will be:
1007 SAD_EXPR <x_t, y_t, sum_0>
1010 static gimple *
1011 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1013 gimple *last_stmt = stmt_vinfo->stmt;
1014 vec_info *vinfo = stmt_vinfo->vinfo;
1015 tree half_type;
1017 /* Look for the following pattern
1018 DX = (TYPE1) X;
1019 DY = (TYPE1) Y;
1020 DDIFF = DX - DY;
1021 DAD = ABS_EXPR <DDIFF>;
1022 DDPROD = (TYPE2) DPROD;
1023 sum_1 = DAD + sum_0;
1024 In which
1025 - DX is at least double the size of X
1026 - DY is at least double the size of Y
1027 - DX, DY, DDIFF, DAD all have the same type
1028 - sum is the same size of DAD or bigger
1029 - sum has been recognized as a reduction variable.
1031 This is equivalent to:
1032 DDIFF = X w- Y; #widen sub
1033 DAD = ABS_EXPR <DDIFF>;
1034 sum_1 = DAD w+ sum_0; #widen summation
1036 DDIFF = X w- Y; #widen sub
1037 DAD = ABS_EXPR <DDIFF>;
1038 sum_1 = DAD + sum_0; #summation
1041 /* Starting from LAST_STMT, follow the defs of its uses in search
1042 of the above pattern. */
1044 tree plus_oprnd0, plus_oprnd1;
1045 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1046 &plus_oprnd0, &plus_oprnd1))
1047 return NULL;
1049 tree sum_type = gimple_expr_type (last_stmt);
1051 /* Any non-truncating sequence of conversions is OK here, since
1052 with a successful match, the result of the ABS(U) is known to fit
1053 within the nonnegative range of the result type. (It cannot be the
1054 negative of the minimum signed value due to the range of the widening
1055 MINUS_EXPR.) */
1056 vect_unpromoted_value unprom_abs;
1057 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1058 &unprom_abs);
1060 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1061 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1062 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1063 Then check that plus_oprnd0 is defined by an abs_expr. */
1065 if (!plus_oprnd0)
1066 return NULL;
1068 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1069 if (!abs_stmt_vinfo)
1070 return NULL;
1072 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1073 inside the loop (in case we are analyzing an outer-loop). */
1074 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1075 if (!abs_stmt
1076 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1077 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1078 return NULL;
1080 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1081 tree abs_type = TREE_TYPE (abs_oprnd);
1082 if (TYPE_UNSIGNED (abs_type))
1083 return NULL;
1085 /* Peel off conversions from the ABS input. This can involve sign
1086 changes (e.g. from an unsigned subtraction to a signed ABS input)
1087 or signed promotion, but it can't include unsigned promotion.
1088 (Note that ABS of an unsigned promotion should have been folded
1089 away before now anyway.) */
1090 vect_unpromoted_value unprom_diff;
1091 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1092 &unprom_diff);
1093 if (!abs_oprnd)
1094 return NULL;
1095 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1096 && TYPE_UNSIGNED (unprom_diff.type))
1097 return NULL;
1099 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1100 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1101 if (!diff_stmt_vinfo)
1102 return NULL;
1104 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1105 inside the loop (in case we are analyzing an outer-loop). */
1106 vect_unpromoted_value unprom[2];
1107 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1108 false, 2, unprom, &half_type))
1109 return NULL;
1111 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1113 tree half_vectype;
1114 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1115 type_out, &half_vectype))
1116 return NULL;
1118 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1119 tree sad_oprnd[2];
1120 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1121 unprom, half_vectype);
1123 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1124 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1125 sad_oprnd[1], plus_oprnd1);
1127 return pattern_stmt;
1130 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1131 so that it can be treated as though it had the form:
1133 A_TYPE a;
1134 B_TYPE b;
1135 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1136 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1137 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1138 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1139 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1141 Try to replace the pattern with:
1143 A_TYPE a;
1144 B_TYPE b;
1145 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1146 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1147 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1148 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1150 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1152 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1153 name of the pattern being matched, for dump purposes. */
1155 static gimple *
1156 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1157 tree_code orig_code, tree_code wide_code,
1158 bool shift_p, const char *name)
1160 gimple *last_stmt = last_stmt_info->stmt;
1162 vect_unpromoted_value unprom[2];
1163 tree half_type;
1164 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1165 shift_p, 2, unprom, &half_type))
1166 return NULL;
1168 /* Pattern detected. */
1169 vect_pattern_detected (name, last_stmt);
1171 tree type = gimple_expr_type (last_stmt);
1172 tree itype = type;
1173 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1174 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1175 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1176 TYPE_UNSIGNED (half_type));
1178 /* Check target support */
1179 tree vectype = get_vectype_for_scalar_type (half_type);
1180 tree vecitype = get_vectype_for_scalar_type (itype);
1181 enum tree_code dummy_code;
1182 int dummy_int;
1183 auto_vec<tree> dummy_vec;
1184 if (!vectype
1185 || !vecitype
1186 || !supportable_widening_operation (wide_code, last_stmt_info,
1187 vecitype, vectype,
1188 &dummy_code, &dummy_code,
1189 &dummy_int, &dummy_vec))
1190 return NULL;
1192 *type_out = get_vectype_for_scalar_type (type);
1193 if (!*type_out)
1194 return NULL;
1196 tree oprnd[2];
1197 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1199 tree var = vect_recog_temp_ssa_var (itype, NULL);
1200 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1201 oprnd[0], oprnd[1]);
1203 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1206 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1207 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1209 static gimple *
1210 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1212 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1213 WIDEN_MULT_EXPR, false,
1214 "vect_recog_widen_mult_pattern");
1217 /* Function vect_recog_pow_pattern
1219 Try to find the following pattern:
1221 x = POW (y, N);
1223 with POW being one of pow, powf, powi, powif and N being
1224 either 2 or 0.5.
1226 Input:
1228 * STMT_VINFO: The stmt from which the pattern search begins.
1230 Output:
1232 * TYPE_OUT: The type of the output of this pattern.
1234 * Return value: A new stmt that will be used to replace the sequence of
1235 stmts that constitute the pattern. In this case it will be:
1236 x = x * x
1238 x = sqrt (x)
1241 static gimple *
1242 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1244 gimple *last_stmt = stmt_vinfo->stmt;
1245 tree base, exp;
1246 gimple *stmt;
1247 tree var;
1249 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1250 return NULL;
1252 switch (gimple_call_combined_fn (last_stmt))
1254 CASE_CFN_POW:
1255 CASE_CFN_POWI:
1256 break;
1258 default:
1259 return NULL;
1262 base = gimple_call_arg (last_stmt, 0);
1263 exp = gimple_call_arg (last_stmt, 1);
1264 if (TREE_CODE (exp) != REAL_CST
1265 && TREE_CODE (exp) != INTEGER_CST)
1267 if (flag_unsafe_math_optimizations
1268 && TREE_CODE (base) == REAL_CST
1269 && !gimple_call_internal_p (last_stmt))
1271 combined_fn log_cfn;
1272 built_in_function exp_bfn;
1273 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1275 case BUILT_IN_POW:
1276 log_cfn = CFN_BUILT_IN_LOG;
1277 exp_bfn = BUILT_IN_EXP;
1278 break;
1279 case BUILT_IN_POWF:
1280 log_cfn = CFN_BUILT_IN_LOGF;
1281 exp_bfn = BUILT_IN_EXPF;
1282 break;
1283 case BUILT_IN_POWL:
1284 log_cfn = CFN_BUILT_IN_LOGL;
1285 exp_bfn = BUILT_IN_EXPL;
1286 break;
1287 default:
1288 return NULL;
1290 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1291 tree exp_decl = builtin_decl_implicit (exp_bfn);
1292 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1293 does that, but if C is a power of 2, we want to use
1294 exp2 (log2 (C) * x) in the non-vectorized version, but for
1295 vectorization we don't have vectorized exp2. */
1296 if (logc
1297 && TREE_CODE (logc) == REAL_CST
1298 && exp_decl
1299 && lookup_attribute ("omp declare simd",
1300 DECL_ATTRIBUTES (exp_decl)))
1302 cgraph_node *node = cgraph_node::get_create (exp_decl);
1303 if (node->simd_clones == NULL)
1305 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1306 || node->definition)
1307 return NULL;
1308 expand_simd_clones (node);
1309 if (node->simd_clones == NULL)
1310 return NULL;
1312 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1313 if (!*type_out)
1314 return NULL;
1315 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1316 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1317 append_pattern_def_seq (stmt_vinfo, g);
1318 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1319 g = gimple_build_call (exp_decl, 1, def);
1320 gimple_call_set_lhs (g, res);
1321 return g;
1325 return NULL;
1328 /* We now have a pow or powi builtin function call with a constant
1329 exponent. */
1331 /* Catch squaring. */
1332 if ((tree_fits_shwi_p (exp)
1333 && tree_to_shwi (exp) == 2)
1334 || (TREE_CODE (exp) == REAL_CST
1335 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1337 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1338 TREE_TYPE (base), type_out))
1339 return NULL;
1341 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1342 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1343 return stmt;
1346 /* Catch square root. */
1347 if (TREE_CODE (exp) == REAL_CST
1348 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1350 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1351 if (*type_out
1352 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1353 OPTIMIZE_FOR_SPEED))
1355 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1356 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1357 gimple_call_set_lhs (stmt, var);
1358 gimple_call_set_nothrow (stmt, true);
1359 return stmt;
1363 return NULL;
1367 /* Function vect_recog_widen_sum_pattern
1369 Try to find the following pattern:
1371 type x_t;
1372 TYPE x_T, sum = init;
1373 loop:
1374 sum_0 = phi <init, sum_1>
1375 S1 x_t = *p;
1376 S2 x_T = (TYPE) x_t;
1377 S3 sum_1 = x_T + sum_0;
1379 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1380 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1381 a special case of a reduction computation.
1383 Input:
1385 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1386 when this function is called with S3, the pattern {S2,S3} will be detected.
1388 Output:
1390 * TYPE_OUT: The type of the output of this pattern.
1392 * Return value: A new stmt that will be used to replace the sequence of
1393 stmts that constitute the pattern. In this case it will be:
1394 WIDEN_SUM <x_t, sum_0>
1396 Note: The widening-sum idiom is a widening reduction pattern that is
1397 vectorized without preserving all the intermediate results. It
1398 produces only N/2 (widened) results (by summing up pairs of
1399 intermediate results) rather than all N results. Therefore, we
1400 cannot allow this pattern when we want to get all the results and in
1401 the correct order (as is the case when this computation is in an
1402 inner-loop nested in an outer-loop that us being vectorized). */
1404 static gimple *
1405 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1407 gimple *last_stmt = stmt_vinfo->stmt;
1408 tree oprnd0, oprnd1;
1409 vec_info *vinfo = stmt_vinfo->vinfo;
1410 tree type;
1411 gimple *pattern_stmt;
1412 tree var;
1414 /* Look for the following pattern
1415 DX = (TYPE) X;
1416 sum_1 = DX + sum_0;
1417 In which DX is at least double the size of X, and sum_1 has been
1418 recognized as a reduction variable.
1421 /* Starting from LAST_STMT, follow the defs of its uses in search
1422 of the above pattern. */
1424 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1425 &oprnd0, &oprnd1))
1426 return NULL;
1428 type = gimple_expr_type (last_stmt);
1430 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1431 we know that oprnd1 is the reduction variable (defined by a loop-header
1432 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1433 Left to check that oprnd0 is defined by a cast from type 'type' to type
1434 'TYPE'. */
1436 vect_unpromoted_value unprom0;
1437 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1438 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1439 return NULL;
1441 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1443 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1444 type_out))
1445 return NULL;
1447 var = vect_recog_temp_ssa_var (type, NULL);
1448 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1450 return pattern_stmt;
1453 /* Recognize cases in which an operation is performed in one type WTYPE
1454 but could be done more efficiently in a narrower type NTYPE. For example,
1455 if we have:
1457 ATYPE a; // narrower than NTYPE
1458 BTYPE b; // narrower than NTYPE
1459 WTYPE aw = (WTYPE) a;
1460 WTYPE bw = (WTYPE) b;
1461 WTYPE res = aw + bw; // only uses of aw and bw
1463 then it would be more efficient to do:
1465 NTYPE an = (NTYPE) a;
1466 NTYPE bn = (NTYPE) b;
1467 NTYPE resn = an + bn;
1468 WTYPE res = (WTYPE) resn;
1470 Other situations include things like:
1472 ATYPE a; // NTYPE or narrower
1473 WTYPE aw = (WTYPE) a;
1474 WTYPE res = aw + b;
1476 when only "(NTYPE) res" is significant. In that case it's more efficient
1477 to truncate "b" and do the operation on NTYPE instead:
1479 NTYPE an = (NTYPE) a;
1480 NTYPE bn = (NTYPE) b; // truncation
1481 NTYPE resn = an + bn;
1482 WTYPE res = (WTYPE) resn;
1484 All users of "res" should then use "resn" instead, making the final
1485 statement dead (not marked as relevant). The final statement is still
1486 needed to maintain the type correctness of the IR.
1488 vect_determine_precisions has already determined the minimum
1489 precison of the operation and the minimum precision required
1490 by users of the result. */
1492 static gimple *
1493 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1495 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1496 if (!last_stmt)
1497 return NULL;
1499 /* See whether we have found that this operation can be done on a
1500 narrower type without changing its semantics. */
1501 unsigned int new_precision = last_stmt_info->operation_precision;
1502 if (!new_precision)
1503 return NULL;
1505 vec_info *vinfo = last_stmt_info->vinfo;
1506 tree lhs = gimple_assign_lhs (last_stmt);
1507 tree type = TREE_TYPE (lhs);
1508 tree_code code = gimple_assign_rhs_code (last_stmt);
1510 /* Keep the first operand of a COND_EXPR as-is: only the other two
1511 operands are interesting. */
1512 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1514 /* Check the operands. */
1515 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1516 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1517 unprom.quick_grow (nops);
1518 unsigned int min_precision = 0;
1519 bool single_use_p = false;
1520 for (unsigned int i = 0; i < nops; ++i)
1522 tree op = gimple_op (last_stmt, first_op + i);
1523 if (TREE_CODE (op) == INTEGER_CST)
1524 unprom[i].set_op (op, vect_constant_def);
1525 else if (TREE_CODE (op) == SSA_NAME)
1527 bool op_single_use_p = true;
1528 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1529 &op_single_use_p))
1530 return NULL;
1531 /* If:
1533 (1) N bits of the result are needed;
1534 (2) all inputs are widened from M<N bits; and
1535 (3) one operand OP is a single-use SSA name
1537 we can shift the M->N widening from OP to the output
1538 without changing the number or type of extensions involved.
1539 This then reduces the number of copies of STMT_INFO.
1541 If instead of (3) more than one operand is a single-use SSA name,
1542 shifting the extension to the output is even more of a win.
1544 If instead:
1546 (1) N bits of the result are needed;
1547 (2) one operand OP2 is widened from M2<N bits;
1548 (3) another operand OP1 is widened from M1<M2 bits; and
1549 (4) both OP1 and OP2 are single-use
1551 the choice is between:
1553 (a) truncating OP2 to M1, doing the operation on M1,
1554 and then widening the result to N
1556 (b) widening OP1 to M2, doing the operation on M2, and then
1557 widening the result to N
1559 Both shift the M2->N widening of the inputs to the output.
1560 (a) additionally shifts the M1->M2 widening to the output;
1561 it requires fewer copies of STMT_INFO but requires an extra
1562 M2->M1 truncation.
1564 Which is better will depend on the complexity and cost of
1565 STMT_INFO, which is hard to predict at this stage. However,
1566 a clear tie-breaker in favor of (b) is the fact that the
1567 truncation in (a) increases the length of the operation chain.
1569 If instead of (4) only one of OP1 or OP2 is single-use,
1570 (b) is still a win over doing the operation in N bits:
1571 it still shifts the M2->N widening on the single-use operand
1572 to the output and reduces the number of STMT_INFO copies.
1574 If neither operand is single-use then operating on fewer than
1575 N bits might lead to more extensions overall. Whether it does
1576 or not depends on global information about the vectorization
1577 region, and whether that's a good trade-off would again
1578 depend on the complexity and cost of the statements involved,
1579 as well as things like register pressure that are not normally
1580 modelled at this stage. We therefore ignore these cases
1581 and just optimize the clear single-use wins above.
1583 Thus we take the maximum precision of the unpromoted operands
1584 and record whether any operand is single-use. */
1585 if (unprom[i].dt == vect_internal_def)
1587 min_precision = MAX (min_precision,
1588 TYPE_PRECISION (unprom[i].type));
1589 single_use_p |= op_single_use_p;
1594 /* Although the operation could be done in operation_precision, we have
1595 to balance that against introducing extra truncations or extensions.
1596 Calculate the minimum precision that can be handled efficiently.
1598 The loop above determined that the operation could be handled
1599 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1600 extension from the inputs to the output without introducing more
1601 instructions, and would reduce the number of instructions required
1602 for STMT_INFO itself.
1604 vect_determine_precisions has also determined that the result only
1605 needs min_output_precision bits. Truncating by a factor of N times
1606 requires a tree of N - 1 instructions, so if TYPE is N times wider
1607 than min_output_precision, doing the operation in TYPE and truncating
1608 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1609 In contrast:
1611 - truncating the input to a unary operation and doing the operation
1612 in the new type requires at most N - 1 + 1 = N instructions per
1613 output vector
1615 - doing the same for a binary operation requires at most
1616 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1618 Both unary and binary operations require fewer instructions than
1619 this if the operands were extended from a suitable truncated form.
1620 Thus there is usually nothing to lose by doing operations in
1621 min_output_precision bits, but there can be something to gain. */
1622 if (!single_use_p)
1623 min_precision = last_stmt_info->min_output_precision;
1624 else
1625 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1627 /* Apply the minimum efficient precision we just calculated. */
1628 if (new_precision < min_precision)
1629 new_precision = min_precision;
1630 if (new_precision >= TYPE_PRECISION (type))
1631 return NULL;
1633 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1635 *type_out = get_vectype_for_scalar_type (type);
1636 if (!*type_out)
1637 return NULL;
1639 /* We've found a viable pattern. Get the new type of the operation. */
1640 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1641 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1643 /* We specifically don't check here whether the target supports the
1644 new operation, since it might be something that a later pattern
1645 wants to rewrite anyway. If targets have a minimum element size
1646 for some optabs, we should pattern-match smaller ops to larger ops
1647 where beneficial. */
1648 tree new_vectype = get_vectype_for_scalar_type (new_type);
1649 if (!new_vectype)
1650 return NULL;
1652 if (dump_enabled_p ())
1653 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1654 type, new_type);
1656 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1657 tree ops[3] = {};
1658 for (unsigned int i = 1; i < first_op; ++i)
1659 ops[i - 1] = gimple_op (last_stmt, i);
1660 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1661 new_type, &unprom[0], new_vectype);
1663 /* Use the operation to produce a result of type NEW_TYPE. */
1664 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1665 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1666 ops[0], ops[1], ops[2]);
1667 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1669 if (dump_enabled_p ())
1670 dump_printf_loc (MSG_NOTE, vect_location,
1671 "created pattern stmt: %G", pattern_stmt);
1673 pattern_stmt = vect_convert_output (last_stmt_info, type,
1674 pattern_stmt, new_vectype);
1676 return pattern_stmt;
1679 /* Recognize the patterns:
1681 ATYPE a; // narrower than TYPE
1682 BTYPE b; // narrower than TYPE
1683 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1684 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1686 where only the bottom half of avg is used. Try to transform them into:
1688 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1689 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1691 followed by:
1693 TYPE avg = (TYPE) avg';
1695 where NTYPE is no wider than half of TYPE. Since only the bottom half
1696 of avg is used, all or part of the cast of avg' should become redundant. */
1698 static gimple *
1699 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1701 /* Check for a shift right by one bit. */
1702 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1703 vec_info *vinfo = last_stmt_info->vinfo;
1704 if (!last_stmt
1705 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1706 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1707 return NULL;
1709 /* Check that the shift result is wider than the users of the
1710 result need (i.e. that narrowing would be a natural choice). */
1711 tree lhs = gimple_assign_lhs (last_stmt);
1712 tree type = TREE_TYPE (lhs);
1713 unsigned int target_precision
1714 = vect_element_precision (last_stmt_info->min_output_precision);
1715 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1716 return NULL;
1718 /* Get the definition of the shift input. */
1719 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1720 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1721 if (!plus_stmt_info)
1722 return NULL;
1724 /* Check whether the shift input can be seen as a tree of additions on
1725 2 or 3 widened inputs.
1727 Note that the pattern should be a win even if the result of one or
1728 more additions is reused elsewhere: if the pattern matches, we'd be
1729 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1730 internal_fn ifn = IFN_AVG_FLOOR;
1731 vect_unpromoted_value unprom[3];
1732 tree new_type;
1733 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1734 PLUS_EXPR, false, 3,
1735 unprom, &new_type);
1736 if (nops == 0)
1737 return NULL;
1738 if (nops == 3)
1740 /* Check that one operand is 1. */
1741 unsigned int i;
1742 for (i = 0; i < 3; ++i)
1743 if (integer_onep (unprom[i].op))
1744 break;
1745 if (i == 3)
1746 return NULL;
1747 /* Throw away the 1 operand and keep the other two. */
1748 if (i < 2)
1749 unprom[i] = unprom[2];
1750 ifn = IFN_AVG_CEIL;
1753 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1755 /* We know that:
1757 (a) the operation can be viewed as:
1759 TYPE widened0 = (TYPE) UNPROM[0];
1760 TYPE widened1 = (TYPE) UNPROM[1];
1761 TYPE tmp1 = widened0 + widened1 {+ 1};
1762 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1764 (b) the first two statements are equivalent to:
1766 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1767 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1769 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1770 where sensible;
1772 (d) all the operations can be performed correctly at twice the width of
1773 NEW_TYPE, due to the nature of the average operation; and
1775 (e) users of the result of the right shift need only TARGET_PRECISION
1776 bits, where TARGET_PRECISION is no more than half of TYPE's
1777 precision.
1779 Under these circumstances, the only situation in which NEW_TYPE
1780 could be narrower than TARGET_PRECISION is if widened0, widened1
1781 and an addition result are all used more than once. Thus we can
1782 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1783 as "free", whereas widening the result of the average instruction
1784 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1785 therefore better not to go narrower than TARGET_PRECISION. */
1786 if (TYPE_PRECISION (new_type) < target_precision)
1787 new_type = build_nonstandard_integer_type (target_precision,
1788 TYPE_UNSIGNED (new_type));
1790 /* Check for target support. */
1791 tree new_vectype = get_vectype_for_scalar_type (new_type);
1792 if (!new_vectype
1793 || !direct_internal_fn_supported_p (ifn, new_vectype,
1794 OPTIMIZE_FOR_SPEED))
1795 return NULL;
1797 /* The IR requires a valid vector type for the cast result, even though
1798 it's likely to be discarded. */
1799 *type_out = get_vectype_for_scalar_type (type);
1800 if (!*type_out)
1801 return NULL;
1803 /* Generate the IFN_AVG* call. */
1804 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1805 tree new_ops[2];
1806 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1807 unprom, new_vectype);
1808 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1809 new_ops[1]);
1810 gimple_call_set_lhs (average_stmt, new_var);
1811 gimple_set_location (average_stmt, gimple_location (last_stmt));
1813 if (dump_enabled_p ())
1814 dump_printf_loc (MSG_NOTE, vect_location,
1815 "created pattern stmt: %G", average_stmt);
1817 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1820 /* Recognize cases in which the input to a cast is wider than its
1821 output, and the input is fed by a widening operation. Fold this
1822 by removing the unnecessary intermediate widening. E.g.:
1824 unsigned char a;
1825 unsigned int b = (unsigned int) a;
1826 unsigned short c = (unsigned short) b;
1830 unsigned short c = (unsigned short) a;
1832 Although this is rare in input IR, it is an expected side-effect
1833 of the over-widening pattern above.
1835 This is beneficial also for integer-to-float conversions, if the
1836 widened integer has more bits than the float, and if the unwidened
1837 input doesn't. */
1839 static gimple *
1840 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1842 /* Check for a cast, including an integer-to-float conversion. */
1843 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1844 if (!last_stmt)
1845 return NULL;
1846 tree_code code = gimple_assign_rhs_code (last_stmt);
1847 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1848 return NULL;
1850 /* Make sure that the rhs is a scalar with a natural bitsize. */
1851 tree lhs = gimple_assign_lhs (last_stmt);
1852 if (!lhs)
1853 return NULL;
1854 tree lhs_type = TREE_TYPE (lhs);
1855 scalar_mode lhs_mode;
1856 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1857 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1858 return NULL;
1860 /* Check for a narrowing operation (from a vector point of view). */
1861 tree rhs = gimple_assign_rhs1 (last_stmt);
1862 tree rhs_type = TREE_TYPE (rhs);
1863 if (!INTEGRAL_TYPE_P (rhs_type)
1864 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1865 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1866 return NULL;
1868 /* Try to find an unpromoted input. */
1869 vec_info *vinfo = last_stmt_info->vinfo;
1870 vect_unpromoted_value unprom;
1871 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1872 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1873 return NULL;
1875 /* If the bits above RHS_TYPE matter, make sure that they're the
1876 same when extending from UNPROM as they are when extending from RHS. */
1877 if (!INTEGRAL_TYPE_P (lhs_type)
1878 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1879 return NULL;
1881 /* We can get the same result by casting UNPROM directly, to avoid
1882 the unnecessary widening and narrowing. */
1883 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1885 *type_out = get_vectype_for_scalar_type (lhs_type);
1886 if (!*type_out)
1887 return NULL;
1889 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1890 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1891 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1893 return pattern_stmt;
1896 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1897 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1899 static gimple *
1900 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1902 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
1903 WIDEN_LSHIFT_EXPR, true,
1904 "vect_recog_widen_shift_pattern");
1907 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1909 type a_t, b_t, c_t;
1911 S0 a_t = b_t r<< c_t;
1913 Input/Output:
1915 * STMT_VINFO: The stmt from which the pattern search begins,
1916 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1917 with a sequence:
1919 S1 d_t = -c_t;
1920 S2 e_t = d_t & (B - 1);
1921 S3 f_t = b_t << c_t;
1922 S4 g_t = b_t >> e_t;
1923 S0 a_t = f_t | g_t;
1925 where B is element bitsize of type.
1927 Output:
1929 * TYPE_OUT: The type of the output of this pattern.
1931 * Return value: A new stmt that will be used to replace the rotate
1932 S0 stmt. */
1934 static gimple *
1935 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1937 gimple *last_stmt = stmt_vinfo->stmt;
1938 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1939 gimple *pattern_stmt, *def_stmt;
1940 enum tree_code rhs_code;
1941 vec_info *vinfo = stmt_vinfo->vinfo;
1942 enum vect_def_type dt;
1943 optab optab1, optab2;
1944 edge ext_def = NULL;
1946 if (!is_gimple_assign (last_stmt))
1947 return NULL;
1949 rhs_code = gimple_assign_rhs_code (last_stmt);
1950 switch (rhs_code)
1952 case LROTATE_EXPR:
1953 case RROTATE_EXPR:
1954 break;
1955 default:
1956 return NULL;
1959 lhs = gimple_assign_lhs (last_stmt);
1960 oprnd0 = gimple_assign_rhs1 (last_stmt);
1961 type = TREE_TYPE (oprnd0);
1962 oprnd1 = gimple_assign_rhs2 (last_stmt);
1963 if (TREE_CODE (oprnd0) != SSA_NAME
1964 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1965 || !INTEGRAL_TYPE_P (type)
1966 || !TYPE_UNSIGNED (type))
1967 return NULL;
1969 stmt_vec_info def_stmt_info;
1970 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
1971 return NULL;
1973 if (dt != vect_internal_def
1974 && dt != vect_constant_def
1975 && dt != vect_external_def)
1976 return NULL;
1978 vectype = get_vectype_for_scalar_type (type);
1979 if (vectype == NULL_TREE)
1980 return NULL;
1982 /* If vector/vector or vector/scalar rotate is supported by the target,
1983 don't do anything here. */
1984 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1985 if (optab1
1986 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1987 return NULL;
1989 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1991 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1992 if (optab2
1993 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1994 return NULL;
1997 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1998 don't do anything here either. */
1999 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2000 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2001 if (!optab1
2002 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2003 || !optab2
2004 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2006 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2007 return NULL;
2008 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2009 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2010 if (!optab1
2011 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2012 || !optab2
2013 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2014 return NULL;
2017 *type_out = vectype;
2019 if (dt == vect_external_def
2020 && TREE_CODE (oprnd1) == SSA_NAME)
2021 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2023 def = NULL_TREE;
2024 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2025 if (TREE_CODE (oprnd1) == INTEGER_CST
2026 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2027 def = oprnd1;
2028 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2030 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2031 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2032 && TYPE_PRECISION (TREE_TYPE (rhs1))
2033 == TYPE_PRECISION (type))
2034 def = rhs1;
2037 if (def == NULL_TREE)
2039 def = vect_recog_temp_ssa_var (type, NULL);
2040 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2041 if (ext_def)
2043 basic_block new_bb
2044 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2045 gcc_assert (!new_bb);
2047 else
2048 append_pattern_def_seq (stmt_vinfo, def_stmt);
2050 stype = TREE_TYPE (def);
2051 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2053 if (TREE_CODE (def) == INTEGER_CST)
2055 if (!tree_fits_uhwi_p (def)
2056 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2057 || integer_zerop (def))
2058 return NULL;
2059 def2 = build_int_cst (stype,
2060 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2062 else
2064 tree vecstype = get_vectype_for_scalar_type (stype);
2066 if (vecstype == NULL_TREE)
2067 return NULL;
2068 def2 = vect_recog_temp_ssa_var (stype, NULL);
2069 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2070 if (ext_def)
2072 basic_block new_bb
2073 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2074 gcc_assert (!new_bb);
2076 else
2077 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2079 def2 = vect_recog_temp_ssa_var (stype, NULL);
2080 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2081 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2082 gimple_assign_lhs (def_stmt), mask);
2083 if (ext_def)
2085 basic_block new_bb
2086 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2087 gcc_assert (!new_bb);
2089 else
2090 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2093 var1 = vect_recog_temp_ssa_var (type, NULL);
2094 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2095 ? LSHIFT_EXPR : RSHIFT_EXPR,
2096 oprnd0, def);
2097 append_pattern_def_seq (stmt_vinfo, def_stmt);
2099 var2 = vect_recog_temp_ssa_var (type, NULL);
2100 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2101 ? RSHIFT_EXPR : LSHIFT_EXPR,
2102 oprnd0, def2);
2103 append_pattern_def_seq (stmt_vinfo, def_stmt);
2105 /* Pattern detected. */
2106 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2108 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2109 var = vect_recog_temp_ssa_var (type, NULL);
2110 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2112 return pattern_stmt;
2115 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2116 vectorized:
2118 type a_t;
2119 TYPE b_T, res_T;
2121 S1 a_t = ;
2122 S2 b_T = ;
2123 S3 res_T = b_T op a_t;
2125 where type 'TYPE' is a type with different size than 'type',
2126 and op is <<, >> or rotate.
2128 Also detect cases:
2130 type a_t;
2131 TYPE b_T, c_T, res_T;
2133 S0 c_T = ;
2134 S1 a_t = (type) c_T;
2135 S2 b_T = ;
2136 S3 res_T = b_T op a_t;
2138 Input/Output:
2140 * STMT_VINFO: The stmt from which the pattern search begins,
2141 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2142 with a shift/rotate which has same type on both operands, in the
2143 second case just b_T op c_T, in the first case with added cast
2144 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2146 Output:
2148 * TYPE_OUT: The type of the output of this pattern.
2150 * Return value: A new stmt that will be used to replace the shift/rotate
2151 S3 stmt. */
2153 static gimple *
2154 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2155 tree *type_out)
2157 gimple *last_stmt = stmt_vinfo->stmt;
2158 tree oprnd0, oprnd1, lhs, var;
2159 gimple *pattern_stmt;
2160 enum tree_code rhs_code;
2161 vec_info *vinfo = stmt_vinfo->vinfo;
2163 if (!is_gimple_assign (last_stmt))
2164 return NULL;
2166 rhs_code = gimple_assign_rhs_code (last_stmt);
2167 switch (rhs_code)
2169 case LSHIFT_EXPR:
2170 case RSHIFT_EXPR:
2171 case LROTATE_EXPR:
2172 case RROTATE_EXPR:
2173 break;
2174 default:
2175 return NULL;
2178 lhs = gimple_assign_lhs (last_stmt);
2179 oprnd0 = gimple_assign_rhs1 (last_stmt);
2180 oprnd1 = gimple_assign_rhs2 (last_stmt);
2181 if (TREE_CODE (oprnd0) != SSA_NAME
2182 || TREE_CODE (oprnd1) != SSA_NAME
2183 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2184 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2185 || TYPE_PRECISION (TREE_TYPE (lhs))
2186 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2187 return NULL;
2189 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2190 if (!def_vinfo)
2191 return NULL;
2193 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2194 if (*type_out == NULL_TREE)
2195 return NULL;
2197 tree def = NULL_TREE;
2198 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2199 if (def_stmt && gimple_assign_cast_p (def_stmt))
2201 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2202 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2203 && TYPE_PRECISION (TREE_TYPE (rhs1))
2204 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2206 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2207 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2208 def = rhs1;
2209 else
2211 tree mask
2212 = build_low_bits_mask (TREE_TYPE (rhs1),
2213 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2214 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2215 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2216 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2217 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2222 if (def == NULL_TREE)
2224 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2225 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2226 append_pattern_def_seq (stmt_vinfo, def_stmt);
2229 /* Pattern detected. */
2230 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2232 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2233 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2234 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2236 return pattern_stmt;
2239 /* Return true iff the target has a vector optab implementing the operation
2240 CODE on type VECTYPE. */
2242 static bool
2243 target_has_vecop_for_code (tree_code code, tree vectype)
2245 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2246 return voptab
2247 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2250 /* Verify that the target has optabs of VECTYPE to perform all the steps
2251 needed by the multiplication-by-immediate synthesis algorithm described by
2252 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2253 present. Return true iff the target supports all the steps. */
2255 static bool
2256 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2257 tree vectype, bool synth_shift_p)
2259 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2260 return false;
2262 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2263 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2265 if (var == negate_variant
2266 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2267 return false;
2269 /* If we must synthesize shifts with additions make sure that vector
2270 addition is available. */
2271 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2272 return false;
2274 for (int i = 1; i < alg->ops; i++)
2276 switch (alg->op[i])
2278 case alg_shift:
2279 break;
2280 case alg_add_t_m2:
2281 case alg_add_t2_m:
2282 case alg_add_factor:
2283 if (!supports_vplus)
2284 return false;
2285 break;
2286 case alg_sub_t_m2:
2287 case alg_sub_t2_m:
2288 case alg_sub_factor:
2289 if (!supports_vminus)
2290 return false;
2291 break;
2292 case alg_unknown:
2293 case alg_m:
2294 case alg_zero:
2295 case alg_impossible:
2296 return false;
2297 default:
2298 gcc_unreachable ();
2302 return true;
2305 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2306 putting the final result in DEST. Append all statements but the last into
2307 VINFO. Return the last statement. */
2309 static gimple *
2310 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2311 stmt_vec_info vinfo)
2313 HOST_WIDE_INT i;
2314 tree itype = TREE_TYPE (op);
2315 tree prev_res = op;
2316 gcc_assert (amnt >= 0);
2317 for (i = 0; i < amnt; i++)
2319 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2320 : dest;
2321 gimple *stmt
2322 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2323 prev_res = tmp_var;
2324 if (i < amnt - 1)
2325 append_pattern_def_seq (vinfo, stmt);
2326 else
2327 return stmt;
2329 gcc_unreachable ();
2330 return NULL;
2333 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2334 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2335 the process if necessary. Append the resulting assignment statements
2336 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2337 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2338 left shifts using additions. */
2340 static tree
2341 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2342 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2344 if (integer_zerop (op2)
2345 && (code == LSHIFT_EXPR
2346 || code == PLUS_EXPR))
2348 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2349 return op1;
2352 gimple *stmt;
2353 tree itype = TREE_TYPE (op1);
2354 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2356 if (code == LSHIFT_EXPR
2357 && synth_shift_p)
2359 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2360 stmt_vinfo);
2361 append_pattern_def_seq (stmt_vinfo, stmt);
2362 return tmp_var;
2365 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2366 append_pattern_def_seq (stmt_vinfo, stmt);
2367 return tmp_var;
2370 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2371 and simple arithmetic operations to be vectorized. Record the statements
2372 produced in STMT_VINFO and return the last statement in the sequence or
2373 NULL if it's not possible to synthesize such a multiplication.
2374 This function mirrors the behavior of expand_mult_const in expmed.c but
2375 works on tree-ssa form. */
2377 static gimple *
2378 vect_synth_mult_by_constant (tree op, tree val,
2379 stmt_vec_info stmt_vinfo)
2381 tree itype = TREE_TYPE (op);
2382 machine_mode mode = TYPE_MODE (itype);
2383 struct algorithm alg;
2384 mult_variant variant;
2385 if (!tree_fits_shwi_p (val))
2386 return NULL;
2388 /* Multiplication synthesis by shifts, adds and subs can introduce
2389 signed overflow where the original operation didn't. Perform the
2390 operations on an unsigned type and cast back to avoid this.
2391 In the future we may want to relax this for synthesis algorithms
2392 that we can prove do not cause unexpected overflow. */
2393 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2395 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2397 /* Targets that don't support vector shifts but support vector additions
2398 can synthesize shifts that way. */
2399 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2401 HOST_WIDE_INT hwval = tree_to_shwi (val);
2402 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2403 The vectorizer's benefit analysis will decide whether it's beneficial
2404 to do this. */
2405 bool possible = choose_mult_variant (mode, hwval, &alg,
2406 &variant, MAX_COST);
2407 if (!possible)
2408 return NULL;
2410 tree vectype = get_vectype_for_scalar_type (multtype);
2412 if (!vectype
2413 || !target_supports_mult_synth_alg (&alg, variant,
2414 vectype, synth_shift_p))
2415 return NULL;
2417 tree accumulator;
2419 /* Clear out the sequence of statements so we can populate it below. */
2420 gimple *stmt = NULL;
2422 if (cast_to_unsigned_p)
2424 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2425 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2426 append_pattern_def_seq (stmt_vinfo, stmt);
2427 op = tmp_op;
2430 if (alg.op[0] == alg_zero)
2431 accumulator = build_int_cst (multtype, 0);
2432 else
2433 accumulator = op;
2435 bool needs_fixup = (variant == negate_variant)
2436 || (variant == add_variant);
2438 for (int i = 1; i < alg.ops; i++)
2440 tree shft_log = build_int_cst (multtype, alg.log[i]);
2441 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2442 tree tmp_var = NULL_TREE;
2444 switch (alg.op[i])
2446 case alg_shift:
2447 if (synth_shift_p)
2448 stmt
2449 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2450 stmt_vinfo);
2451 else
2452 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2453 shft_log);
2454 break;
2455 case alg_add_t_m2:
2456 tmp_var
2457 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2458 stmt_vinfo, synth_shift_p);
2459 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2460 tmp_var);
2461 break;
2462 case alg_sub_t_m2:
2463 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2464 shft_log, stmt_vinfo,
2465 synth_shift_p);
2466 /* In some algorithms the first step involves zeroing the
2467 accumulator. If subtracting from such an accumulator
2468 just emit the negation directly. */
2469 if (integer_zerop (accumulator))
2470 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2471 else
2472 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2473 tmp_var);
2474 break;
2475 case alg_add_t2_m:
2476 tmp_var
2477 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2478 stmt_vinfo, synth_shift_p);
2479 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2480 break;
2481 case alg_sub_t2_m:
2482 tmp_var
2483 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2484 stmt_vinfo, synth_shift_p);
2485 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2486 break;
2487 case alg_add_factor:
2488 tmp_var
2489 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2490 stmt_vinfo, synth_shift_p);
2491 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2492 tmp_var);
2493 break;
2494 case alg_sub_factor:
2495 tmp_var
2496 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2497 stmt_vinfo, synth_shift_p);
2498 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2499 accumulator);
2500 break;
2501 default:
2502 gcc_unreachable ();
2504 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2505 but rather return it directly. */
2507 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2508 append_pattern_def_seq (stmt_vinfo, stmt);
2509 accumulator = accum_tmp;
2511 if (variant == negate_variant)
2513 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2514 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2515 accumulator = accum_tmp;
2516 if (cast_to_unsigned_p)
2517 append_pattern_def_seq (stmt_vinfo, stmt);
2519 else if (variant == add_variant)
2521 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2522 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2523 accumulator = accum_tmp;
2524 if (cast_to_unsigned_p)
2525 append_pattern_def_seq (stmt_vinfo, stmt);
2527 /* Move back to a signed if needed. */
2528 if (cast_to_unsigned_p)
2530 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2531 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2534 return stmt;
2537 /* Detect multiplication by constant and convert it into a sequence of
2538 shifts and additions, subtractions, negations. We reuse the
2539 choose_mult_variant algorithms from expmed.c
2541 Input/Output:
2543 STMT_VINFO: The stmt from which the pattern search begins,
2544 i.e. the mult stmt.
2546 Output:
2548 * TYPE_OUT: The type of the output of this pattern.
2550 * Return value: A new stmt that will be used to replace
2551 the multiplication. */
2553 static gimple *
2554 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2556 gimple *last_stmt = stmt_vinfo->stmt;
2557 tree oprnd0, oprnd1, vectype, itype;
2558 gimple *pattern_stmt;
2560 if (!is_gimple_assign (last_stmt))
2561 return NULL;
2563 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2564 return NULL;
2566 oprnd0 = gimple_assign_rhs1 (last_stmt);
2567 oprnd1 = gimple_assign_rhs2 (last_stmt);
2568 itype = TREE_TYPE (oprnd0);
2570 if (TREE_CODE (oprnd0) != SSA_NAME
2571 || TREE_CODE (oprnd1) != INTEGER_CST
2572 || !INTEGRAL_TYPE_P (itype)
2573 || !type_has_mode_precision_p (itype))
2574 return NULL;
2576 vectype = get_vectype_for_scalar_type (itype);
2577 if (vectype == NULL_TREE)
2578 return NULL;
2580 /* If the target can handle vectorized multiplication natively,
2581 don't attempt to optimize this. */
2582 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2583 if (mul_optab != unknown_optab)
2585 machine_mode vec_mode = TYPE_MODE (vectype);
2586 int icode = (int) optab_handler (mul_optab, vec_mode);
2587 if (icode != CODE_FOR_nothing)
2588 return NULL;
2591 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2592 if (!pattern_stmt)
2593 return NULL;
2595 /* Pattern detected. */
2596 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2598 *type_out = vectype;
2600 return pattern_stmt;
2603 /* Detect a signed division by a constant that wouldn't be
2604 otherwise vectorized:
2606 type a_t, b_t;
2608 S1 a_t = b_t / N;
2610 where type 'type' is an integral type and N is a constant.
2612 Similarly handle modulo by a constant:
2614 S4 a_t = b_t % N;
2616 Input/Output:
2618 * STMT_VINFO: The stmt from which the pattern search begins,
2619 i.e. the division stmt. S1 is replaced by if N is a power
2620 of two constant and type is signed:
2621 S3 y_t = b_t < 0 ? N - 1 : 0;
2622 S2 x_t = b_t + y_t;
2623 S1' a_t = x_t >> log2 (N);
2625 S4 is replaced if N is a power of two constant and
2626 type is signed by (where *_T temporaries have unsigned type):
2627 S9 y_T = b_t < 0 ? -1U : 0U;
2628 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2629 S7 z_t = (type) z_T;
2630 S6 w_t = b_t + z_t;
2631 S5 x_t = w_t & (N - 1);
2632 S4' a_t = x_t - z_t;
2634 Output:
2636 * TYPE_OUT: The type of the output of this pattern.
2638 * Return value: A new stmt that will be used to replace the division
2639 S1 or modulo S4 stmt. */
2641 static gimple *
2642 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2644 gimple *last_stmt = stmt_vinfo->stmt;
2645 tree oprnd0, oprnd1, vectype, itype, cond;
2646 gimple *pattern_stmt, *def_stmt;
2647 enum tree_code rhs_code;
2648 optab optab;
2649 tree q;
2650 int dummy_int, prec;
2652 if (!is_gimple_assign (last_stmt))
2653 return NULL;
2655 rhs_code = gimple_assign_rhs_code (last_stmt);
2656 switch (rhs_code)
2658 case TRUNC_DIV_EXPR:
2659 case EXACT_DIV_EXPR:
2660 case TRUNC_MOD_EXPR:
2661 break;
2662 default:
2663 return NULL;
2666 oprnd0 = gimple_assign_rhs1 (last_stmt);
2667 oprnd1 = gimple_assign_rhs2 (last_stmt);
2668 itype = TREE_TYPE (oprnd0);
2669 if (TREE_CODE (oprnd0) != SSA_NAME
2670 || TREE_CODE (oprnd1) != INTEGER_CST
2671 || TREE_CODE (itype) != INTEGER_TYPE
2672 || !type_has_mode_precision_p (itype))
2673 return NULL;
2675 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2676 vectype = get_vectype_for_scalar_type (itype);
2677 if (vectype == NULL_TREE)
2678 return NULL;
2680 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2682 /* If the target can handle vectorized division or modulo natively,
2683 don't attempt to optimize this, since native division is likely
2684 to give smaller code. */
2685 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2686 if (optab != unknown_optab)
2688 machine_mode vec_mode = TYPE_MODE (vectype);
2689 int icode = (int) optab_handler (optab, vec_mode);
2690 if (icode != CODE_FOR_nothing)
2691 return NULL;
2695 prec = TYPE_PRECISION (itype);
2696 if (integer_pow2p (oprnd1))
2698 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2699 return NULL;
2701 /* Pattern detected. */
2702 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2704 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2705 build_int_cst (itype, 0));
2706 if (rhs_code == TRUNC_DIV_EXPR
2707 || rhs_code == EXACT_DIV_EXPR)
2709 tree var = vect_recog_temp_ssa_var (itype, NULL);
2710 tree shift;
2711 def_stmt
2712 = gimple_build_assign (var, COND_EXPR, cond,
2713 fold_build2 (MINUS_EXPR, itype, oprnd1,
2714 build_int_cst (itype, 1)),
2715 build_int_cst (itype, 0));
2716 append_pattern_def_seq (stmt_vinfo, def_stmt);
2717 var = vect_recog_temp_ssa_var (itype, NULL);
2718 def_stmt
2719 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2720 gimple_assign_lhs (def_stmt));
2721 append_pattern_def_seq (stmt_vinfo, def_stmt);
2723 shift = build_int_cst (itype, tree_log2 (oprnd1));
2724 pattern_stmt
2725 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2726 RSHIFT_EXPR, var, shift);
2728 else
2730 tree signmask;
2731 if (compare_tree_int (oprnd1, 2) == 0)
2733 signmask = vect_recog_temp_ssa_var (itype, NULL);
2734 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2735 build_int_cst (itype, 1),
2736 build_int_cst (itype, 0));
2737 append_pattern_def_seq (stmt_vinfo, def_stmt);
2739 else
2741 tree utype
2742 = build_nonstandard_integer_type (prec, 1);
2743 tree vecutype = get_vectype_for_scalar_type (utype);
2744 tree shift
2745 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2746 - tree_log2 (oprnd1));
2747 tree var = vect_recog_temp_ssa_var (utype, NULL);
2749 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2750 build_int_cst (utype, -1),
2751 build_int_cst (utype, 0));
2752 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2753 var = vect_recog_temp_ssa_var (utype, NULL);
2754 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2755 gimple_assign_lhs (def_stmt),
2756 shift);
2757 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2758 signmask = vect_recog_temp_ssa_var (itype, NULL);
2759 def_stmt
2760 = gimple_build_assign (signmask, NOP_EXPR, var);
2761 append_pattern_def_seq (stmt_vinfo, def_stmt);
2763 def_stmt
2764 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2765 PLUS_EXPR, oprnd0, signmask);
2766 append_pattern_def_seq (stmt_vinfo, def_stmt);
2767 def_stmt
2768 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2769 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2770 fold_build2 (MINUS_EXPR, itype, oprnd1,
2771 build_int_cst (itype, 1)));
2772 append_pattern_def_seq (stmt_vinfo, def_stmt);
2774 pattern_stmt
2775 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2776 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2777 signmask);
2780 *type_out = vectype;
2781 return pattern_stmt;
2784 if (prec > HOST_BITS_PER_WIDE_INT
2785 || integer_zerop (oprnd1))
2786 return NULL;
2788 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2789 return NULL;
2791 if (TYPE_UNSIGNED (itype))
2793 unsigned HOST_WIDE_INT mh, ml;
2794 int pre_shift, post_shift;
2795 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2796 & GET_MODE_MASK (itype_mode));
2797 tree t1, t2, t3, t4;
2799 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2800 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2801 return NULL;
2803 /* Find a suitable multiplier and right shift count
2804 instead of multiplying with D. */
2805 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2807 /* If the suggested multiplier is more than SIZE bits, we can do better
2808 for even divisors, using an initial right shift. */
2809 if (mh != 0 && (d & 1) == 0)
2811 pre_shift = ctz_or_zero (d);
2812 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2813 &ml, &post_shift, &dummy_int);
2814 gcc_assert (!mh);
2816 else
2817 pre_shift = 0;
2819 if (mh != 0)
2821 if (post_shift - 1 >= prec)
2822 return NULL;
2824 /* t1 = oprnd0 h* ml;
2825 t2 = oprnd0 - t1;
2826 t3 = t2 >> 1;
2827 t4 = t1 + t3;
2828 q = t4 >> (post_shift - 1); */
2829 t1 = vect_recog_temp_ssa_var (itype, NULL);
2830 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2831 build_int_cst (itype, ml));
2832 append_pattern_def_seq (stmt_vinfo, def_stmt);
2834 t2 = vect_recog_temp_ssa_var (itype, NULL);
2835 def_stmt
2836 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2837 append_pattern_def_seq (stmt_vinfo, def_stmt);
2839 t3 = vect_recog_temp_ssa_var (itype, NULL);
2840 def_stmt
2841 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2842 append_pattern_def_seq (stmt_vinfo, def_stmt);
2844 t4 = vect_recog_temp_ssa_var (itype, NULL);
2845 def_stmt
2846 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2848 if (post_shift != 1)
2850 append_pattern_def_seq (stmt_vinfo, def_stmt);
2852 q = vect_recog_temp_ssa_var (itype, NULL);
2853 pattern_stmt
2854 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2855 build_int_cst (itype, post_shift - 1));
2857 else
2859 q = t4;
2860 pattern_stmt = def_stmt;
2863 else
2865 if (pre_shift >= prec || post_shift >= prec)
2866 return NULL;
2868 /* t1 = oprnd0 >> pre_shift;
2869 t2 = t1 h* ml;
2870 q = t2 >> post_shift; */
2871 if (pre_shift)
2873 t1 = vect_recog_temp_ssa_var (itype, NULL);
2874 def_stmt
2875 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2876 build_int_cst (NULL, pre_shift));
2877 append_pattern_def_seq (stmt_vinfo, def_stmt);
2879 else
2880 t1 = oprnd0;
2882 t2 = vect_recog_temp_ssa_var (itype, NULL);
2883 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2884 build_int_cst (itype, ml));
2886 if (post_shift)
2888 append_pattern_def_seq (stmt_vinfo, def_stmt);
2890 q = vect_recog_temp_ssa_var (itype, NULL);
2891 def_stmt
2892 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2893 build_int_cst (itype, post_shift));
2895 else
2896 q = t2;
2898 pattern_stmt = def_stmt;
2901 else
2903 unsigned HOST_WIDE_INT ml;
2904 int post_shift;
2905 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2906 unsigned HOST_WIDE_INT abs_d;
2907 bool add = false;
2908 tree t1, t2, t3, t4;
2910 /* Give up for -1. */
2911 if (d == -1)
2912 return NULL;
2914 /* Since d might be INT_MIN, we have to cast to
2915 unsigned HOST_WIDE_INT before negating to avoid
2916 undefined signed overflow. */
2917 abs_d = (d >= 0
2918 ? (unsigned HOST_WIDE_INT) d
2919 : - (unsigned HOST_WIDE_INT) d);
2921 /* n rem d = n rem -d */
2922 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2924 d = abs_d;
2925 oprnd1 = build_int_cst (itype, abs_d);
2927 else if (HOST_BITS_PER_WIDE_INT >= prec
2928 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2929 /* This case is not handled correctly below. */
2930 return NULL;
2932 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2933 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2935 add = true;
2936 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2938 if (post_shift >= prec)
2939 return NULL;
2941 /* t1 = oprnd0 h* ml; */
2942 t1 = vect_recog_temp_ssa_var (itype, NULL);
2943 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2944 build_int_cst (itype, ml));
2946 if (add)
2948 /* t2 = t1 + oprnd0; */
2949 append_pattern_def_seq (stmt_vinfo, def_stmt);
2950 t2 = vect_recog_temp_ssa_var (itype, NULL);
2951 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2953 else
2954 t2 = t1;
2956 if (post_shift)
2958 /* t3 = t2 >> post_shift; */
2959 append_pattern_def_seq (stmt_vinfo, def_stmt);
2960 t3 = vect_recog_temp_ssa_var (itype, NULL);
2961 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2962 build_int_cst (itype, post_shift));
2964 else
2965 t3 = t2;
2967 wide_int oprnd0_min, oprnd0_max;
2968 int msb = 1;
2969 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2971 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2972 msb = 0;
2973 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2974 msb = -1;
2977 if (msb == 0 && d >= 0)
2979 /* q = t3; */
2980 q = t3;
2981 pattern_stmt = def_stmt;
2983 else
2985 /* t4 = oprnd0 >> (prec - 1);
2986 or if we know from VRP that oprnd0 >= 0
2987 t4 = 0;
2988 or if we know from VRP that oprnd0 < 0
2989 t4 = -1; */
2990 append_pattern_def_seq (stmt_vinfo, def_stmt);
2991 t4 = vect_recog_temp_ssa_var (itype, NULL);
2992 if (msb != 1)
2993 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2994 build_int_cst (itype, msb));
2995 else
2996 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2997 build_int_cst (itype, prec - 1));
2998 append_pattern_def_seq (stmt_vinfo, def_stmt);
3000 /* q = t3 - t4; or q = t4 - t3; */
3001 q = vect_recog_temp_ssa_var (itype, NULL);
3002 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3003 d < 0 ? t3 : t4);
3007 if (rhs_code == TRUNC_MOD_EXPR)
3009 tree r, t1;
3011 /* We divided. Now finish by:
3012 t1 = q * oprnd1;
3013 r = oprnd0 - t1; */
3014 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3016 t1 = vect_recog_temp_ssa_var (itype, NULL);
3017 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3018 append_pattern_def_seq (stmt_vinfo, def_stmt);
3020 r = vect_recog_temp_ssa_var (itype, NULL);
3021 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3024 /* Pattern detected. */
3025 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3027 *type_out = vectype;
3028 return pattern_stmt;
3031 /* Function vect_recog_mixed_size_cond_pattern
3033 Try to find the following pattern:
3035 type x_t, y_t;
3036 TYPE a_T, b_T, c_T;
3037 loop:
3038 S1 a_T = x_t CMP y_t ? b_T : c_T;
3040 where type 'TYPE' is an integral type which has different size
3041 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3042 than 'type', the constants need to fit into an integer type
3043 with the same width as 'type') or results of conversion from 'type'.
3045 Input:
3047 * STMT_VINFO: The stmt from which the pattern search begins.
3049 Output:
3051 * TYPE_OUT: The type of the output of this pattern.
3053 * Return value: A new stmt that will be used to replace the pattern.
3054 Additionally a def_stmt is added.
3056 a_it = x_t CMP y_t ? b_it : c_it;
3057 a_T = (TYPE) a_it; */
3059 static gimple *
3060 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3062 gimple *last_stmt = stmt_vinfo->stmt;
3063 tree cond_expr, then_clause, else_clause;
3064 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3065 gimple *pattern_stmt, *def_stmt;
3066 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3067 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3068 bool promotion;
3069 tree comp_scalar_type;
3071 if (!is_gimple_assign (last_stmt)
3072 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3073 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3074 return NULL;
3076 cond_expr = gimple_assign_rhs1 (last_stmt);
3077 then_clause = gimple_assign_rhs2 (last_stmt);
3078 else_clause = gimple_assign_rhs3 (last_stmt);
3080 if (!COMPARISON_CLASS_P (cond_expr))
3081 return NULL;
3083 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3084 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3085 if (comp_vectype == NULL_TREE)
3086 return NULL;
3088 type = gimple_expr_type (last_stmt);
3089 if (types_compatible_p (type, comp_scalar_type)
3090 || ((TREE_CODE (then_clause) != INTEGER_CST
3091 || TREE_CODE (else_clause) != INTEGER_CST)
3092 && !INTEGRAL_TYPE_P (comp_scalar_type))
3093 || !INTEGRAL_TYPE_P (type))
3094 return NULL;
3096 if ((TREE_CODE (then_clause) != INTEGER_CST
3097 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3098 &def_stmt0, &promotion))
3099 || (TREE_CODE (else_clause) != INTEGER_CST
3100 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3101 &def_stmt1, &promotion)))
3102 return NULL;
3104 if (orig_type0 && orig_type1
3105 && !types_compatible_p (orig_type0, orig_type1))
3106 return NULL;
3108 if (orig_type0)
3110 if (!types_compatible_p (orig_type0, comp_scalar_type))
3111 return NULL;
3112 then_clause = gimple_assign_rhs1 (def_stmt0);
3113 itype = orig_type0;
3116 if (orig_type1)
3118 if (!types_compatible_p (orig_type1, comp_scalar_type))
3119 return NULL;
3120 else_clause = gimple_assign_rhs1 (def_stmt1);
3121 itype = orig_type1;
3125 HOST_WIDE_INT cmp_mode_size
3126 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3128 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3129 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3130 return NULL;
3132 vectype = get_vectype_for_scalar_type (type);
3133 if (vectype == NULL_TREE)
3134 return NULL;
3136 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3137 return NULL;
3139 if (itype == NULL_TREE)
3140 itype = build_nonstandard_integer_type (cmp_mode_size,
3141 TYPE_UNSIGNED (type));
3143 if (itype == NULL_TREE
3144 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3145 return NULL;
3147 vecitype = get_vectype_for_scalar_type (itype);
3148 if (vecitype == NULL_TREE)
3149 return NULL;
3151 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3152 return NULL;
3154 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3156 if ((TREE_CODE (then_clause) == INTEGER_CST
3157 && !int_fits_type_p (then_clause, itype))
3158 || (TREE_CODE (else_clause) == INTEGER_CST
3159 && !int_fits_type_p (else_clause, itype)))
3160 return NULL;
3163 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3164 COND_EXPR, unshare_expr (cond_expr),
3165 fold_convert (itype, then_clause),
3166 fold_convert (itype, else_clause));
3167 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3168 NOP_EXPR, gimple_assign_lhs (def_stmt));
3170 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3171 *type_out = vectype;
3173 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3175 return pattern_stmt;
3179 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3180 true if bool VAR can and should be optimized that way. Assume it shouldn't
3181 in case it's a result of a comparison which can be directly vectorized into
3182 a vector comparison. Fills in STMTS with all stmts visited during the
3183 walk. */
3185 static bool
3186 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3188 tree rhs1;
3189 enum tree_code rhs_code;
3191 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3192 if (!def_stmt_info)
3193 return false;
3195 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3196 if (!def_stmt)
3197 return false;
3199 if (stmts.contains (def_stmt))
3200 return true;
3202 rhs1 = gimple_assign_rhs1 (def_stmt);
3203 rhs_code = gimple_assign_rhs_code (def_stmt);
3204 switch (rhs_code)
3206 case SSA_NAME:
3207 if (! check_bool_pattern (rhs1, vinfo, stmts))
3208 return false;
3209 break;
3211 CASE_CONVERT:
3212 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3213 return false;
3214 if (! check_bool_pattern (rhs1, vinfo, stmts))
3215 return false;
3216 break;
3218 case BIT_NOT_EXPR:
3219 if (! check_bool_pattern (rhs1, vinfo, stmts))
3220 return false;
3221 break;
3223 case BIT_AND_EXPR:
3224 case BIT_IOR_EXPR:
3225 case BIT_XOR_EXPR:
3226 if (! check_bool_pattern (rhs1, vinfo, stmts)
3227 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3228 return false;
3229 break;
3231 default:
3232 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3234 tree vecitype, comp_vectype;
3236 /* If the comparison can throw, then is_gimple_condexpr will be
3237 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3238 if (stmt_could_throw_p (cfun, def_stmt))
3239 return false;
3241 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3242 if (comp_vectype == NULL_TREE)
3243 return false;
3245 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3246 if (mask_type
3247 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3248 return false;
3250 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3252 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3253 tree itype
3254 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3255 vecitype = get_vectype_for_scalar_type (itype);
3256 if (vecitype == NULL_TREE)
3257 return false;
3259 else
3260 vecitype = comp_vectype;
3261 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3262 return false;
3264 else
3265 return false;
3266 break;
3269 bool res = stmts.add (def_stmt);
3270 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3271 gcc_assert (!res);
3273 return true;
3277 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3278 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3279 pattern sequence. */
3281 static tree
3282 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3284 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3285 NOP_EXPR, var);
3286 append_pattern_def_seq (stmt_info, cast_stmt,
3287 get_vectype_for_scalar_type (type));
3288 return gimple_assign_lhs (cast_stmt);
3291 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3292 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3293 type, OUT_TYPE is the desired final integer type of the whole pattern.
3294 STMT_INFO is the info of the pattern root and is where pattern stmts should
3295 be associated with. DEFS is a map of pattern defs. */
3297 static void
3298 adjust_bool_pattern (tree var, tree out_type,
3299 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3301 gimple *stmt = SSA_NAME_DEF_STMT (var);
3302 enum tree_code rhs_code, def_rhs_code;
3303 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3304 location_t loc;
3305 gimple *pattern_stmt, *def_stmt;
3306 tree trueval = NULL_TREE;
3308 rhs1 = gimple_assign_rhs1 (stmt);
3309 rhs2 = gimple_assign_rhs2 (stmt);
3310 rhs_code = gimple_assign_rhs_code (stmt);
3311 loc = gimple_location (stmt);
3312 switch (rhs_code)
3314 case SSA_NAME:
3315 CASE_CONVERT:
3316 irhs1 = *defs.get (rhs1);
3317 itype = TREE_TYPE (irhs1);
3318 pattern_stmt
3319 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3320 SSA_NAME, irhs1);
3321 break;
3323 case BIT_NOT_EXPR:
3324 irhs1 = *defs.get (rhs1);
3325 itype = TREE_TYPE (irhs1);
3326 pattern_stmt
3327 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3328 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3329 break;
3331 case BIT_AND_EXPR:
3332 /* Try to optimize x = y & (a < b ? 1 : 0); into
3333 x = (a < b ? y : 0);
3335 E.g. for:
3336 bool a_b, b_b, c_b;
3337 TYPE d_T;
3339 S1 a_b = x1 CMP1 y1;
3340 S2 b_b = x2 CMP2 y2;
3341 S3 c_b = a_b & b_b;
3342 S4 d_T = (TYPE) c_b;
3344 we would normally emit:
3346 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3347 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3348 S3' c_T = a_T & b_T;
3349 S4' d_T = c_T;
3351 but we can save one stmt by using the
3352 result of one of the COND_EXPRs in the other COND_EXPR and leave
3353 BIT_AND_EXPR stmt out:
3355 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3356 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3357 S4' f_T = c_T;
3359 At least when VEC_COND_EXPR is implemented using masks
3360 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3361 computes the comparison masks and ands it, in one case with
3362 all ones vector, in the other case with a vector register.
3363 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3364 often more expensive. */
3365 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3366 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3367 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3369 irhs1 = *defs.get (rhs1);
3370 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3371 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3372 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3374 rhs_code = def_rhs_code;
3375 rhs1 = def_rhs1;
3376 rhs2 = gimple_assign_rhs2 (def_stmt);
3377 trueval = irhs1;
3378 goto do_compare;
3380 else
3381 irhs2 = *defs.get (rhs2);
3382 goto and_ior_xor;
3384 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3385 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3386 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3388 irhs2 = *defs.get (rhs2);
3389 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3390 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3391 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3393 rhs_code = def_rhs_code;
3394 rhs1 = def_rhs1;
3395 rhs2 = gimple_assign_rhs2 (def_stmt);
3396 trueval = irhs2;
3397 goto do_compare;
3399 else
3400 irhs1 = *defs.get (rhs1);
3401 goto and_ior_xor;
3403 /* FALLTHRU */
3404 case BIT_IOR_EXPR:
3405 case BIT_XOR_EXPR:
3406 irhs1 = *defs.get (rhs1);
3407 irhs2 = *defs.get (rhs2);
3408 and_ior_xor:
3409 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3410 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3412 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3413 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3414 int out_prec = TYPE_PRECISION (out_type);
3415 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3416 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3417 stmt_info);
3418 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3419 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3420 stmt_info);
3421 else
3423 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3424 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3427 itype = TREE_TYPE (irhs1);
3428 pattern_stmt
3429 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3430 rhs_code, irhs1, irhs2);
3431 break;
3433 default:
3434 do_compare:
3435 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3436 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3437 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3438 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3439 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3441 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3442 itype
3443 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3445 else
3446 itype = TREE_TYPE (rhs1);
3447 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3448 if (trueval == NULL_TREE)
3449 trueval = build_int_cst (itype, 1);
3450 else
3451 gcc_checking_assert (useless_type_conversion_p (itype,
3452 TREE_TYPE (trueval)));
3453 pattern_stmt
3454 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3455 COND_EXPR, cond_expr, trueval,
3456 build_int_cst (itype, 0));
3457 break;
3460 gimple_set_location (pattern_stmt, loc);
3461 append_pattern_def_seq (stmt_info, pattern_stmt,
3462 get_vectype_for_scalar_type (itype));
3463 defs.put (var, gimple_assign_lhs (pattern_stmt));
3466 /* Comparison function to qsort a vector of gimple stmts after UID. */
3468 static int
3469 sort_after_uid (const void *p1, const void *p2)
3471 const gimple *stmt1 = *(const gimple * const *)p1;
3472 const gimple *stmt2 = *(const gimple * const *)p2;
3473 return gimple_uid (stmt1) - gimple_uid (stmt2);
3476 /* Create pattern stmts for all stmts participating in the bool pattern
3477 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3478 OUT_TYPE. Return the def of the pattern root. */
3480 static tree
3481 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3482 tree out_type, stmt_vec_info stmt_info)
3484 /* Gather original stmts in the bool pattern in their order of appearance
3485 in the IL. */
3486 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3487 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3488 i != bool_stmt_set.end (); ++i)
3489 bool_stmts.quick_push (*i);
3490 bool_stmts.qsort (sort_after_uid);
3492 /* Now process them in that order, producing pattern stmts. */
3493 hash_map <tree, tree> defs;
3494 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3495 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3496 out_type, stmt_info, defs);
3498 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3499 gimple *pattern_stmt
3500 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3501 return gimple_assign_lhs (pattern_stmt);
3504 /* Helper for search_type_for_mask. */
3506 static tree
3507 search_type_for_mask_1 (tree var, vec_info *vinfo,
3508 hash_map<gimple *, tree> &cache)
3510 tree rhs1;
3511 enum tree_code rhs_code;
3512 tree res = NULL_TREE, res2;
3514 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3515 return NULL_TREE;
3517 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3518 if (!def_stmt_info)
3519 return NULL_TREE;
3521 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3522 if (!def_stmt)
3523 return NULL_TREE;
3525 tree *c = cache.get (def_stmt);
3526 if (c)
3527 return *c;
3529 rhs_code = gimple_assign_rhs_code (def_stmt);
3530 rhs1 = gimple_assign_rhs1 (def_stmt);
3532 switch (rhs_code)
3534 case SSA_NAME:
3535 case BIT_NOT_EXPR:
3536 CASE_CONVERT:
3537 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3538 break;
3540 case BIT_AND_EXPR:
3541 case BIT_IOR_EXPR:
3542 case BIT_XOR_EXPR:
3543 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3544 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3545 cache);
3546 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3547 res = res2;
3548 break;
3550 default:
3551 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3553 tree comp_vectype, mask_type;
3555 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3557 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3558 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3559 vinfo, cache);
3560 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3561 res = res2;
3562 break;
3565 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3566 if (comp_vectype == NULL_TREE)
3568 res = NULL_TREE;
3569 break;
3572 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3573 if (!mask_type
3574 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3576 res = NULL_TREE;
3577 break;
3580 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3581 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3583 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3584 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3586 else
3587 res = TREE_TYPE (rhs1);
3591 cache.put (def_stmt, res);
3592 return res;
3595 /* Return the proper type for converting bool VAR into
3596 an integer value or NULL_TREE if no such type exists.
3597 The type is chosen so that converted value has the
3598 same number of elements as VAR's vector type. */
3600 static tree
3601 search_type_for_mask (tree var, vec_info *vinfo)
3603 hash_map<gimple *, tree> cache;
3604 return search_type_for_mask_1 (var, vinfo, cache);
3607 /* Function vect_recog_bool_pattern
3609 Try to find pattern like following:
3611 bool a_b, b_b, c_b, d_b, e_b;
3612 TYPE f_T;
3613 loop:
3614 S1 a_b = x1 CMP1 y1;
3615 S2 b_b = x2 CMP2 y2;
3616 S3 c_b = a_b & b_b;
3617 S4 d_b = x3 CMP3 y3;
3618 S5 e_b = c_b | d_b;
3619 S6 f_T = (TYPE) e_b;
3621 where type 'TYPE' is an integral type. Or a similar pattern
3622 ending in
3624 S6 f_Y = e_b ? r_Y : s_Y;
3626 as results from if-conversion of a complex condition.
3628 Input:
3630 * STMT_VINFO: The stmt at the end from which the pattern
3631 search begins, i.e. cast of a bool to
3632 an integer type.
3634 Output:
3636 * TYPE_OUT: The type of the output of this pattern.
3638 * Return value: A new stmt that will be used to replace the pattern.
3640 Assuming size of TYPE is the same as size of all comparisons
3641 (otherwise some casts would be added where needed), the above
3642 sequence we create related pattern stmts:
3643 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3644 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3645 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3646 S5' e_T = c_T | d_T;
3647 S6' f_T = e_T;
3649 Instead of the above S3' we could emit:
3650 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3651 S3' c_T = a_T | b_T;
3652 but the above is more efficient. */
3654 static gimple *
3655 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3657 gimple *last_stmt = stmt_vinfo->stmt;
3658 enum tree_code rhs_code;
3659 tree var, lhs, rhs, vectype;
3660 vec_info *vinfo = stmt_vinfo->vinfo;
3661 gimple *pattern_stmt;
3663 if (!is_gimple_assign (last_stmt))
3664 return NULL;
3666 var = gimple_assign_rhs1 (last_stmt);
3667 lhs = gimple_assign_lhs (last_stmt);
3669 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3670 return NULL;
3672 hash_set<gimple *> bool_stmts;
3674 rhs_code = gimple_assign_rhs_code (last_stmt);
3675 if (CONVERT_EXPR_CODE_P (rhs_code))
3677 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3678 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3679 return NULL;
3680 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3681 if (vectype == NULL_TREE)
3682 return NULL;
3684 if (check_bool_pattern (var, vinfo, bool_stmts))
3686 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
3687 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3688 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3689 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3690 else
3691 pattern_stmt
3692 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3694 else
3696 tree type = search_type_for_mask (var, vinfo);
3697 tree cst0, cst1, tmp;
3699 if (!type)
3700 return NULL;
3702 /* We may directly use cond with narrowed type to avoid
3703 multiple cond exprs with following result packing and
3704 perform single cond with packed mask instead. In case
3705 of widening we better make cond first and then extract
3706 results. */
3707 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3708 type = TREE_TYPE (lhs);
3710 cst0 = build_int_cst (type, 0);
3711 cst1 = build_int_cst (type, 1);
3712 tmp = vect_recog_temp_ssa_var (type, NULL);
3713 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3715 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3717 tree new_vectype = get_vectype_for_scalar_type (type);
3718 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3720 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3721 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3725 *type_out = vectype;
3726 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3728 return pattern_stmt;
3730 else if (rhs_code == COND_EXPR
3731 && TREE_CODE (var) == SSA_NAME)
3733 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3734 if (vectype == NULL_TREE)
3735 return NULL;
3737 /* Build a scalar type for the boolean result that when
3738 vectorized matches the vector type of the result in
3739 size and number of elements. */
3740 unsigned prec
3741 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3742 TYPE_VECTOR_SUBPARTS (vectype));
3744 tree type
3745 = build_nonstandard_integer_type (prec,
3746 TYPE_UNSIGNED (TREE_TYPE (var)));
3747 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3748 return NULL;
3750 if (!check_bool_pattern (var, vinfo, bool_stmts))
3751 return NULL;
3753 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
3755 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3756 pattern_stmt
3757 = gimple_build_assign (lhs, COND_EXPR,
3758 build2 (NE_EXPR, boolean_type_node,
3759 rhs, build_int_cst (type, 0)),
3760 gimple_assign_rhs2 (last_stmt),
3761 gimple_assign_rhs3 (last_stmt));
3762 *type_out = vectype;
3763 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3765 return pattern_stmt;
3767 else if (rhs_code == SSA_NAME
3768 && STMT_VINFO_DATA_REF (stmt_vinfo))
3770 stmt_vec_info pattern_stmt_info;
3771 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3772 gcc_assert (vectype != NULL_TREE);
3773 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3774 return NULL;
3776 if (check_bool_pattern (var, vinfo, bool_stmts))
3777 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
3778 else
3780 tree type = search_type_for_mask (var, vinfo);
3781 tree cst0, cst1, new_vectype;
3783 if (!type)
3784 return NULL;
3786 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3787 type = TREE_TYPE (vectype);
3789 cst0 = build_int_cst (type, 0);
3790 cst1 = build_int_cst (type, 1);
3791 new_vectype = get_vectype_for_scalar_type (type);
3793 rhs = vect_recog_temp_ssa_var (type, NULL);
3794 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3795 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3798 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3799 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3801 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3802 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3803 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3804 rhs = rhs2;
3806 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3807 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3808 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3809 *type_out = vectype;
3810 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3812 return pattern_stmt;
3814 else
3815 return NULL;
3819 /* A helper for vect_recog_mask_conversion_pattern. Build
3820 conversion of MASK to a type suitable for masking VECTYPE.
3821 Built statement gets required vectype and is appended to
3822 a pattern sequence of STMT_VINFO.
3824 Return converted mask. */
3826 static tree
3827 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
3829 gimple *stmt;
3830 tree masktype, tmp;
3832 masktype = build_same_sized_truth_vector_type (vectype);
3833 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3834 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3835 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
3837 return tmp;
3841 /* Function vect_recog_mask_conversion_pattern
3843 Try to find statements which require boolean type
3844 converison. Additional conversion statements are
3845 added to handle such cases. For example:
3847 bool m_1, m_2, m_3;
3848 int i_4, i_5;
3849 double d_6, d_7;
3850 char c_1, c_2, c_3;
3852 S1 m_1 = i_4 > i_5;
3853 S2 m_2 = d_6 < d_7;
3854 S3 m_3 = m_1 & m_2;
3855 S4 c_1 = m_3 ? c_2 : c_3;
3857 Will be transformed into:
3859 S1 m_1 = i_4 > i_5;
3860 S2 m_2 = d_6 < d_7;
3861 S3'' m_2' = (_Bool[bitsize=32])m_2
3862 S3' m_3' = m_1 & m_2';
3863 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3864 S4' c_1' = m_3'' ? c_2 : c_3; */
3866 static gimple *
3867 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3869 gimple *last_stmt = stmt_vinfo->stmt;
3870 enum tree_code rhs_code;
3871 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3872 tree vectype1, vectype2;
3873 stmt_vec_info pattern_stmt_info;
3874 vec_info *vinfo = stmt_vinfo->vinfo;
3876 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3877 if (is_gimple_call (last_stmt)
3878 && gimple_call_internal_p (last_stmt))
3880 gcall *pattern_stmt;
3882 internal_fn ifn = gimple_call_internal_fn (last_stmt);
3883 int mask_argno = internal_fn_mask_index (ifn);
3884 if (mask_argno < 0)
3885 return NULL;
3887 bool store_p = internal_store_fn_p (ifn);
3888 if (store_p)
3890 int rhs_index = internal_fn_stored_value_index (ifn);
3891 tree rhs = gimple_call_arg (last_stmt, rhs_index);
3892 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
3894 else
3896 lhs = gimple_call_lhs (last_stmt);
3897 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3900 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3901 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3902 if (!mask_arg_type)
3903 return NULL;
3904 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
3906 if (!vectype1 || !vectype2
3907 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3908 TYPE_VECTOR_SUBPARTS (vectype2)))
3909 return NULL;
3911 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3913 auto_vec<tree, 8> args;
3914 unsigned int nargs = gimple_call_num_args (last_stmt);
3915 args.safe_grow (nargs);
3916 for (unsigned int i = 0; i < nargs; ++i)
3917 args[i] = ((int) i == mask_argno
3918 ? tmp
3919 : gimple_call_arg (last_stmt, i));
3920 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
3922 if (!store_p)
3924 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3925 gimple_call_set_lhs (pattern_stmt, lhs);
3927 gimple_call_set_nothrow (pattern_stmt, true);
3929 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3930 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3931 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3933 *type_out = vectype1;
3934 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3936 return pattern_stmt;
3939 if (!is_gimple_assign (last_stmt))
3940 return NULL;
3942 gimple *pattern_stmt;
3943 lhs = gimple_assign_lhs (last_stmt);
3944 rhs1 = gimple_assign_rhs1 (last_stmt);
3945 rhs_code = gimple_assign_rhs_code (last_stmt);
3947 /* Check for cond expression requiring mask conversion. */
3948 if (rhs_code == COND_EXPR)
3950 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3952 if (TREE_CODE (rhs1) == SSA_NAME)
3954 rhs1_type = search_type_for_mask (rhs1, vinfo);
3955 if (!rhs1_type)
3956 return NULL;
3958 else if (COMPARISON_CLASS_P (rhs1))
3960 /* Check whether we're comparing scalar booleans and (if so)
3961 whether a better mask type exists than the mask associated
3962 with boolean-sized elements. This avoids unnecessary packs
3963 and unpacks if the booleans are set from comparisons of
3964 wider types. E.g. in:
3966 int x1, x2, x3, x4, y1, y1;
3968 bool b1 = (x1 == x2);
3969 bool b2 = (x3 == x4);
3970 ... = b1 == b2 ? y1 : y2;
3972 it is better for b1 and b2 to use the mask type associated
3973 with int elements rather bool (byte) elements. */
3974 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
3975 if (!rhs1_type)
3976 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3978 else
3979 return NULL;
3981 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3983 if (!vectype1 || !vectype2)
3984 return NULL;
3986 /* Continue if a conversion is needed. Also continue if we have
3987 a comparison whose vector type would normally be different from
3988 VECTYPE2 when considered in isolation. In that case we'll
3989 replace the comparison with an SSA name (so that we can record
3990 its vector type) and behave as though the comparison was an SSA
3991 name from the outset. */
3992 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3993 TYPE_VECTOR_SUBPARTS (vectype2))
3994 && (TREE_CODE (rhs1) == SSA_NAME
3995 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
3996 return NULL;
3998 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
3999 in place, we can handle it in vectorizable_condition. This avoids
4000 unnecessary promotion stmts and increased vectorization factor. */
4001 if (COMPARISON_CLASS_P (rhs1)
4002 && INTEGRAL_TYPE_P (rhs1_type)
4003 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4004 TYPE_VECTOR_SUBPARTS (vectype2)))
4006 enum vect_def_type dt;
4007 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4008 && dt == vect_external_def
4009 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4010 && (dt == vect_external_def
4011 || dt == vect_constant_def))
4013 tree wide_scalar_type = build_nonstandard_integer_type
4014 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4015 TYPE_UNSIGNED (rhs1_type));
4016 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4017 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4018 return NULL;
4022 /* If rhs1 is a comparison we need to move it into a
4023 separate statement. */
4024 if (TREE_CODE (rhs1) != SSA_NAME)
4026 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4027 pattern_stmt = gimple_build_assign (tmp, rhs1);
4028 rhs1 = tmp;
4029 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
4032 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4033 TYPE_VECTOR_SUBPARTS (vectype2)))
4034 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4035 else
4036 tmp = rhs1;
4038 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4039 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4040 gimple_assign_rhs2 (last_stmt),
4041 gimple_assign_rhs3 (last_stmt));
4043 *type_out = vectype1;
4044 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4046 return pattern_stmt;
4049 /* Now check for binary boolean operations requiring conversion for
4050 one of operands. */
4051 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4052 return NULL;
4054 if (rhs_code != BIT_IOR_EXPR
4055 && rhs_code != BIT_XOR_EXPR
4056 && rhs_code != BIT_AND_EXPR
4057 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4058 return NULL;
4060 rhs2 = gimple_assign_rhs2 (last_stmt);
4062 rhs1_type = search_type_for_mask (rhs1, vinfo);
4063 rhs2_type = search_type_for_mask (rhs2, vinfo);
4065 if (!rhs1_type || !rhs2_type
4066 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4067 return NULL;
4069 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4071 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4072 if (!vectype1)
4073 return NULL;
4074 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4076 else
4078 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4079 if (!vectype1)
4080 return NULL;
4081 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4084 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4085 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4087 *type_out = vectype1;
4088 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4090 return pattern_stmt;
4093 /* STMT_INFO is a load or store. If the load or store is conditional, return
4094 the boolean condition under which it occurs, otherwise return null. */
4096 static tree
4097 vect_get_load_store_mask (stmt_vec_info stmt_info)
4099 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4101 gcc_assert (gimple_assign_single_p (def_assign));
4102 return NULL_TREE;
4105 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4107 internal_fn ifn = gimple_call_internal_fn (def_call);
4108 int mask_index = internal_fn_mask_index (ifn);
4109 return gimple_call_arg (def_call, mask_index);
4112 gcc_unreachable ();
4115 /* Return the scalar offset type that an internal gather/scatter function
4116 should use. GS_INFO describes the gather/scatter operation. */
4118 static tree
4119 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4121 tree offset_type = TREE_TYPE (gs_info->offset);
4122 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4124 /* Enforced by vect_check_gather_scatter. */
4125 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4126 gcc_assert (element_bits >= offset_bits);
4128 /* If the offset is narrower than the elements, extend it according
4129 to its sign. */
4130 if (element_bits > offset_bits)
4131 return build_nonstandard_integer_type (element_bits,
4132 TYPE_UNSIGNED (offset_type));
4134 return offset_type;
4137 /* Return MASK if MASK is suitable for masking an operation on vectors
4138 of type VECTYPE, otherwise convert it into such a form and return
4139 the result. Associate any conversion statements with STMT_INFO's
4140 pattern. */
4142 static tree
4143 vect_convert_mask_for_vectype (tree mask, tree vectype,
4144 stmt_vec_info stmt_info, vec_info *vinfo)
4146 tree mask_type = search_type_for_mask (mask, vinfo);
4147 if (mask_type)
4149 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4150 if (mask_vectype
4151 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4152 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4153 mask = build_mask_conversion (mask, vectype, stmt_info);
4155 return mask;
4158 /* Return the equivalent of:
4160 fold_convert (TYPE, VALUE)
4162 with the expectation that the operation will be vectorized.
4163 If new statements are needed, add them as pattern statements
4164 to STMT_INFO. */
4166 static tree
4167 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4169 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4170 return value;
4172 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4173 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4174 append_pattern_def_seq (stmt_info, conversion,
4175 get_vectype_for_scalar_type (type));
4176 return new_value;
4179 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4180 internal function. Return the final statement on success and set
4181 *TYPE_OUT to the vector type being loaded or stored.
4183 This function only handles gathers and scatters that were recognized
4184 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4186 static gimple *
4187 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4189 /* Currently we only support this for loop vectorization. */
4190 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4191 if (!loop_vinfo)
4192 return NULL;
4194 /* Make sure that we're looking at a gather load or scatter store. */
4195 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4196 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4197 return NULL;
4199 /* Get the boolean that controls whether the load or store happens.
4200 This is null if the operation is unconditional. */
4201 tree mask = vect_get_load_store_mask (stmt_info);
4203 /* Make sure that the target supports an appropriate internal
4204 function for the gather/scatter operation. */
4205 gather_scatter_info gs_info;
4206 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4207 || gs_info.decl)
4208 return NULL;
4210 /* Convert the mask to the right form. */
4211 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4212 if (mask)
4213 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4214 loop_vinfo);
4216 /* Get the invariant base and non-invariant offset, converting the
4217 latter to the same width as the vector elements. */
4218 tree base = gs_info.base;
4219 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4220 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4221 stmt_info);
4223 /* Build the new pattern statement. */
4224 tree scale = size_int (gs_info.scale);
4225 gcall *pattern_stmt;
4226 if (DR_IS_READ (dr))
4228 if (mask != NULL)
4229 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4230 offset, scale, mask);
4231 else
4232 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4233 offset, scale);
4234 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4235 gimple_call_set_lhs (pattern_stmt, load_lhs);
4237 else
4239 tree rhs = vect_get_store_rhs (stmt_info);
4240 if (mask != NULL)
4241 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4242 base, offset, scale, rhs,
4243 mask);
4244 else
4245 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4246 base, offset, scale, rhs);
4248 gimple_call_set_nothrow (pattern_stmt, true);
4250 /* Copy across relevant vectorization info and associate DR with the
4251 new pattern statement instead of the original statement. */
4252 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4253 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4255 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4256 *type_out = vectype;
4257 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4259 return pattern_stmt;
4262 /* Return true if TYPE is a non-boolean integer type. These are the types
4263 that we want to consider for narrowing. */
4265 static bool
4266 vect_narrowable_type_p (tree type)
4268 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4271 /* Return true if the operation given by CODE can be truncated to N bits
4272 when only N bits of the output are needed. This is only true if bit N+1
4273 of the inputs has no effect on the low N bits of the result. */
4275 static bool
4276 vect_truncatable_operation_p (tree_code code)
4278 switch (code)
4280 case PLUS_EXPR:
4281 case MINUS_EXPR:
4282 case MULT_EXPR:
4283 case BIT_AND_EXPR:
4284 case BIT_IOR_EXPR:
4285 case BIT_XOR_EXPR:
4286 case COND_EXPR:
4287 return true;
4289 default:
4290 return false;
4294 /* Record that STMT_INFO could be changed from operating on TYPE to
4295 operating on a type with the precision and sign given by PRECISION
4296 and SIGN respectively. PRECISION is an arbitrary bit precision;
4297 it might not be a whole number of bytes. */
4299 static void
4300 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4301 unsigned int precision, signop sign)
4303 /* Round the precision up to a whole number of bytes. */
4304 precision = vect_element_precision (precision);
4305 if (precision < TYPE_PRECISION (type)
4306 && (!stmt_info->operation_precision
4307 || stmt_info->operation_precision > precision))
4309 stmt_info->operation_precision = precision;
4310 stmt_info->operation_sign = sign;
4314 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4315 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4316 is an arbitrary bit precision; it might not be a whole number of bytes. */
4318 static void
4319 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4320 unsigned int min_input_precision)
4322 /* This operation in isolation only requires the inputs to have
4323 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4324 that MIN_INPUT_PRECISION is a natural precision for the chain
4325 as a whole. E.g. consider something like:
4327 unsigned short *x, *y;
4328 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4330 The right shift can be done on unsigned chars, and only requires the
4331 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4332 approach would mean turning a natural chain of single-vector unsigned
4333 short operations into one that truncates "*x" and then extends
4334 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4335 operation and one vector for each unsigned char operation.
4336 This would be a significant pessimization.
4338 Instead only propagate the maximum of this precision and the precision
4339 required by the users of the result. This means that we don't pessimize
4340 the case above but continue to optimize things like:
4342 unsigned char *y;
4343 unsigned short *x;
4344 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4346 Here we would truncate two vectors of *x to a single vector of
4347 unsigned chars and use single-vector unsigned char operations for
4348 everything else, rather than doing two unsigned short copies of
4349 "(*x & 0xf0) >> 4" and then truncating the result. */
4350 min_input_precision = MAX (min_input_precision,
4351 stmt_info->min_output_precision);
4353 if (min_input_precision < TYPE_PRECISION (type)
4354 && (!stmt_info->min_input_precision
4355 || stmt_info->min_input_precision > min_input_precision))
4356 stmt_info->min_input_precision = min_input_precision;
4359 /* Subroutine of vect_determine_min_output_precision. Return true if
4360 we can calculate a reduced number of output bits for STMT_INFO,
4361 whose result is LHS. */
4363 static bool
4364 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4366 /* Take the maximum precision required by users of the result. */
4367 vec_info *vinfo = stmt_info->vinfo;
4368 unsigned int precision = 0;
4369 imm_use_iterator iter;
4370 use_operand_p use;
4371 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4373 gimple *use_stmt = USE_STMT (use);
4374 if (is_gimple_debug (use_stmt))
4375 continue;
4376 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4377 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4378 return false;
4379 /* The input precision recorded for COND_EXPRs applies only to the
4380 "then" and "else" values. */
4381 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4382 if (assign
4383 && gimple_assign_rhs_code (assign) == COND_EXPR
4384 && use->use != gimple_assign_rhs2_ptr (assign)
4385 && use->use != gimple_assign_rhs3_ptr (assign))
4386 return false;
4387 precision = MAX (precision, use_stmt_info->min_input_precision);
4390 if (dump_enabled_p ())
4391 dump_printf_loc (MSG_NOTE, vect_location,
4392 "only the low %d bits of %T are significant\n",
4393 precision, lhs);
4394 stmt_info->min_output_precision = precision;
4395 return true;
4398 /* Calculate min_output_precision for STMT_INFO. */
4400 static void
4401 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4403 /* We're only interested in statements with a narrowable result. */
4404 tree lhs = gimple_get_lhs (stmt_info->stmt);
4405 if (!lhs
4406 || TREE_CODE (lhs) != SSA_NAME
4407 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4408 return;
4410 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4411 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4414 /* Use range information to decide whether STMT (described by STMT_INFO)
4415 could be done in a narrower type. This is effectively a forward
4416 propagation, since it uses context-independent information that applies
4417 to all users of an SSA name. */
4419 static void
4420 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4422 tree lhs = gimple_assign_lhs (stmt);
4423 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4424 return;
4426 tree type = TREE_TYPE (lhs);
4427 if (!vect_narrowable_type_p (type))
4428 return;
4430 /* First see whether we have any useful range information for the result. */
4431 unsigned int precision = TYPE_PRECISION (type);
4432 signop sign = TYPE_SIGN (type);
4433 wide_int min_value, max_value;
4434 if (!vect_get_range_info (lhs, &min_value, &max_value))
4435 return;
4437 tree_code code = gimple_assign_rhs_code (stmt);
4438 unsigned int nops = gimple_num_ops (stmt);
4440 if (!vect_truncatable_operation_p (code))
4441 /* Check that all relevant input operands are compatible, and update
4442 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4443 for (unsigned int i = 1; i < nops; ++i)
4445 tree op = gimple_op (stmt, i);
4446 if (TREE_CODE (op) == INTEGER_CST)
4448 /* Don't require the integer to have RHS_TYPE (which it might
4449 not for things like shift amounts, etc.), but do require it
4450 to fit the type. */
4451 if (!int_fits_type_p (op, type))
4452 return;
4454 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4455 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4457 else if (TREE_CODE (op) == SSA_NAME)
4459 /* Ignore codes that don't take uniform arguments. */
4460 if (!types_compatible_p (TREE_TYPE (op), type))
4461 return;
4463 wide_int op_min_value, op_max_value;
4464 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4465 return;
4467 min_value = wi::min (min_value, op_min_value, sign);
4468 max_value = wi::max (max_value, op_max_value, sign);
4470 else
4471 return;
4474 /* Try to switch signed types for unsigned types if we can.
4475 This is better for two reasons. First, unsigned ops tend
4476 to be cheaper than signed ops. Second, it means that we can
4477 handle things like:
4479 signed char c;
4480 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4484 signed char c;
4485 unsigned short res_1 = (unsigned short) c & 0xff00;
4486 int res = (int) res_1;
4488 where the intermediate result res_1 has unsigned rather than
4489 signed type. */
4490 if (sign == SIGNED && !wi::neg_p (min_value))
4491 sign = UNSIGNED;
4493 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4494 unsigned int precision1 = wi::min_precision (min_value, sign);
4495 unsigned int precision2 = wi::min_precision (max_value, sign);
4496 unsigned int value_precision = MAX (precision1, precision2);
4497 if (value_precision >= precision)
4498 return;
4500 if (dump_enabled_p ())
4501 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4502 " without loss of precision: %G",
4503 sign == SIGNED ? "signed" : "unsigned",
4504 value_precision, stmt);
4506 vect_set_operation_type (stmt_info, type, value_precision, sign);
4507 vect_set_min_input_precision (stmt_info, type, value_precision);
4510 /* Use information about the users of STMT's result to decide whether
4511 STMT (described by STMT_INFO) could be done in a narrower type.
4512 This is effectively a backward propagation. */
4514 static void
4515 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4517 tree_code code = gimple_assign_rhs_code (stmt);
4518 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4519 tree type = TREE_TYPE (gimple_op (stmt, opno));
4520 if (!vect_narrowable_type_p (type))
4521 return;
4523 unsigned int precision = TYPE_PRECISION (type);
4524 unsigned int operation_precision, min_input_precision;
4525 switch (code)
4527 CASE_CONVERT:
4528 /* Only the bits that contribute to the output matter. Don't change
4529 the precision of the operation itself. */
4530 operation_precision = precision;
4531 min_input_precision = stmt_info->min_output_precision;
4532 break;
4534 case LSHIFT_EXPR:
4535 case RSHIFT_EXPR:
4537 tree shift = gimple_assign_rhs2 (stmt);
4538 if (TREE_CODE (shift) != INTEGER_CST
4539 || !wi::ltu_p (wi::to_widest (shift), precision))
4540 return;
4541 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4542 if (code == LSHIFT_EXPR)
4544 /* We need CONST_SHIFT fewer bits of the input. */
4545 operation_precision = stmt_info->min_output_precision;
4546 min_input_precision = (MAX (operation_precision, const_shift)
4547 - const_shift);
4549 else
4551 /* We need CONST_SHIFT extra bits to do the operation. */
4552 operation_precision = (stmt_info->min_output_precision
4553 + const_shift);
4554 min_input_precision = operation_precision;
4556 break;
4559 default:
4560 if (vect_truncatable_operation_p (code))
4562 /* Input bit N has no effect on output bits N-1 and lower. */
4563 operation_precision = stmt_info->min_output_precision;
4564 min_input_precision = operation_precision;
4565 break;
4567 return;
4570 if (operation_precision < precision)
4572 if (dump_enabled_p ())
4573 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4574 " without affecting users: %G",
4575 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4576 operation_precision, stmt);
4577 vect_set_operation_type (stmt_info, type, operation_precision,
4578 TYPE_SIGN (type));
4580 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4583 /* Handle vect_determine_precisions for STMT_INFO, given that we
4584 have already done so for the users of its result. */
4586 void
4587 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4589 vect_determine_min_output_precision (stmt_info);
4590 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4592 vect_determine_precisions_from_range (stmt_info, stmt);
4593 vect_determine_precisions_from_users (stmt_info, stmt);
4597 /* Walk backwards through the vectorizable region to determine the
4598 values of these fields:
4600 - min_output_precision
4601 - min_input_precision
4602 - operation_precision
4603 - operation_sign. */
4605 void
4606 vect_determine_precisions (vec_info *vinfo)
4608 DUMP_VECT_SCOPE ("vect_determine_precisions");
4610 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4612 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4613 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4614 unsigned int nbbs = loop->num_nodes;
4616 for (unsigned int i = 0; i < nbbs; i++)
4618 basic_block bb = bbs[nbbs - i - 1];
4619 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4620 !gsi_end_p (si); gsi_prev (&si))
4621 vect_determine_stmt_precisions
4622 (vinfo->lookup_stmt (gsi_stmt (si)));
4625 else
4627 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4628 gimple_stmt_iterator si = bb_vinfo->region_end;
4629 gimple *stmt;
4632 if (!gsi_stmt (si))
4633 si = gsi_last_bb (bb_vinfo->bb);
4634 else
4635 gsi_prev (&si);
4636 stmt = gsi_stmt (si);
4637 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
4638 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4639 vect_determine_stmt_precisions (stmt_info);
4641 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4645 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
4647 struct vect_recog_func
4649 vect_recog_func_ptr fn;
4650 const char *name;
4653 /* Note that ordering matters - the first pattern matching on a stmt is
4654 taken which means usually the more complex one needs to preceed the
4655 less comples onex (widen_sum only after dot_prod or sad for example). */
4656 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4657 { vect_recog_over_widening_pattern, "over_widening" },
4658 /* Must come after over_widening, which narrows the shift as much as
4659 possible beforehand. */
4660 { vect_recog_average_pattern, "average" },
4661 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4662 { vect_recog_widen_mult_pattern, "widen_mult" },
4663 { vect_recog_dot_prod_pattern, "dot_prod" },
4664 { vect_recog_sad_pattern, "sad" },
4665 { vect_recog_widen_sum_pattern, "widen_sum" },
4666 { vect_recog_pow_pattern, "pow" },
4667 { vect_recog_widen_shift_pattern, "widen_shift" },
4668 { vect_recog_rotate_pattern, "rotate" },
4669 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4670 { vect_recog_divmod_pattern, "divmod" },
4671 { vect_recog_mult_pattern, "mult" },
4672 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4673 { vect_recog_bool_pattern, "bool" },
4674 /* This must come before mask conversion, and includes the parts
4675 of mask conversion that are needed for gather and scatter
4676 internal functions. */
4677 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4678 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4681 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4683 /* Mark statements that are involved in a pattern. */
4685 static inline void
4686 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
4687 tree pattern_vectype)
4689 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4691 gimple *orig_pattern_stmt = NULL;
4692 if (is_pattern_stmt_p (orig_stmt_info))
4694 /* We're replacing a statement in an existing pattern definition
4695 sequence. */
4696 orig_pattern_stmt = orig_stmt_info->stmt;
4697 if (dump_enabled_p ())
4698 dump_printf_loc (MSG_NOTE, vect_location,
4699 "replacing earlier pattern %G", orig_pattern_stmt);
4701 /* To keep the book-keeping simple, just swap the lhs of the
4702 old and new statements, so that the old one has a valid but
4703 unused lhs. */
4704 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
4705 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
4706 gimple_set_lhs (pattern_stmt, old_lhs);
4708 if (dump_enabled_p ())
4709 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
4711 /* Switch to the statement that ORIG replaces. */
4712 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
4714 /* We shouldn't be replacing the main pattern statement. */
4715 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
4716 != orig_pattern_stmt);
4719 if (def_seq)
4720 for (gimple_stmt_iterator si = gsi_start (def_seq);
4721 !gsi_end_p (si); gsi_next (&si))
4722 vect_init_pattern_stmt (gsi_stmt (si), orig_stmt_info, pattern_vectype);
4724 if (orig_pattern_stmt)
4726 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4728 /* Insert all the new pattern statements before the original one. */
4729 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4730 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
4731 orig_def_seq);
4732 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4733 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4735 /* Remove the pattern statement that this new pattern replaces. */
4736 gsi_remove (&gsi, false);
4738 else
4739 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4742 /* Function vect_pattern_recog_1
4744 Input:
4745 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4746 computation pattern.
4747 STMT_INFO: A stmt from which the pattern search should start.
4749 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4750 a sequence of statements that has the same functionality and can be
4751 used to replace STMT_INFO. It returns the last statement in the sequence
4752 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
4753 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4754 statement, having first checked that the target supports the new operation
4755 in that type.
4757 This function also does some bookkeeping, as explained in the documentation
4758 for vect_recog_pattern. */
4760 static void
4761 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
4763 vec_info *vinfo = stmt_info->vinfo;
4764 gimple *pattern_stmt;
4765 loop_vec_info loop_vinfo;
4766 tree pattern_vectype;
4768 /* If this statement has already been replaced with pattern statements,
4769 leave the original statement alone, since the first match wins.
4770 Instead try to match against the definition statements that feed
4771 the main pattern statement. */
4772 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4774 gimple_stmt_iterator gsi;
4775 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4776 !gsi_end_p (gsi); gsi_next (&gsi))
4777 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
4778 return;
4781 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4782 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
4783 if (!pattern_stmt)
4785 /* Clear any half-formed pattern definition sequence. */
4786 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4787 return;
4790 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4791 gcc_assert (pattern_vectype);
4793 /* Found a vectorizable pattern. */
4794 if (dump_enabled_p ())
4795 dump_printf_loc (MSG_NOTE, vect_location,
4796 "%s pattern recognized: %G",
4797 recog_func->name, pattern_stmt);
4799 /* Mark the stmts that are involved in the pattern. */
4800 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
4802 /* Patterns cannot be vectorized using SLP, because they change the order of
4803 computation. */
4804 if (loop_vinfo)
4806 unsigned ix, ix2;
4807 stmt_vec_info *elem_ptr;
4808 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4809 elem_ptr, *elem_ptr == stmt_info);
4814 /* Function vect_pattern_recog
4816 Input:
4817 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4818 computation idioms.
4820 Output - for each computation idiom that is detected we create a new stmt
4821 that provides the same functionality and that can be vectorized. We
4822 also record some information in the struct_stmt_info of the relevant
4823 stmts, as explained below:
4825 At the entry to this function we have the following stmts, with the
4826 following initial value in the STMT_VINFO fields:
4828 stmt in_pattern_p related_stmt vec_stmt
4829 S1: a_i = .... - - -
4830 S2: a_2 = ..use(a_i).. - - -
4831 S3: a_1 = ..use(a_2).. - - -
4832 S4: a_0 = ..use(a_1).. - - -
4833 S5: ... = ..use(a_0).. - - -
4835 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4836 represented by a single stmt. We then:
4837 - create a new stmt S6 equivalent to the pattern (the stmt is not
4838 inserted into the code)
4839 - fill in the STMT_VINFO fields as follows:
4841 in_pattern_p related_stmt vec_stmt
4842 S1: a_i = .... - - -
4843 S2: a_2 = ..use(a_i).. - - -
4844 S3: a_1 = ..use(a_2).. - - -
4845 S4: a_0 = ..use(a_1).. true S6 -
4846 '---> S6: a_new = .... - S4 -
4847 S5: ... = ..use(a_0).. - - -
4849 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4850 to each other through the RELATED_STMT field).
4852 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4853 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4854 remain irrelevant unless used by stmts other than S4.
4856 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4857 (because they are marked as irrelevant). It will vectorize S6, and record
4858 a pointer to the new vector stmt VS6 from S6 (as usual).
4859 S4 will be skipped, and S5 will be vectorized as usual:
4861 in_pattern_p related_stmt vec_stmt
4862 S1: a_i = .... - - -
4863 S2: a_2 = ..use(a_i).. - - -
4864 S3: a_1 = ..use(a_2).. - - -
4865 > VS6: va_new = .... - - -
4866 S4: a_0 = ..use(a_1).. true S6 VS6
4867 '---> S6: a_new = .... - S4 VS6
4868 > VS5: ... = ..vuse(va_new).. - - -
4869 S5: ... = ..use(a_0).. - - -
4871 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4872 elsewhere), and we'll end up with:
4874 VS6: va_new = ....
4875 VS5: ... = ..vuse(va_new)..
4877 In case of more than one pattern statements, e.g., widen-mult with
4878 intermediate type:
4880 S1 a_t = ;
4881 S2 a_T = (TYPE) a_t;
4882 '--> S3: a_it = (interm_type) a_t;
4883 S4 prod_T = a_T * CONST;
4884 '--> S5: prod_T' = a_it w* CONST;
4886 there may be other users of a_T outside the pattern. In that case S2 will
4887 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4888 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4889 be recorded in S3. */
4891 void
4892 vect_pattern_recog (vec_info *vinfo)
4894 struct loop *loop;
4895 basic_block *bbs;
4896 unsigned int nbbs;
4897 gimple_stmt_iterator si;
4898 unsigned int i, j;
4900 vect_determine_precisions (vinfo);
4902 DUMP_VECT_SCOPE ("vect_pattern_recog");
4904 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4906 loop = LOOP_VINFO_LOOP (loop_vinfo);
4907 bbs = LOOP_VINFO_BBS (loop_vinfo);
4908 nbbs = loop->num_nodes;
4910 /* Scan through the loop stmts, applying the pattern recognition
4911 functions starting at each stmt visited: */
4912 for (i = 0; i < nbbs; i++)
4914 basic_block bb = bbs[i];
4915 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4917 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
4918 /* Scan over all generic vect_recog_xxx_pattern functions. */
4919 for (j = 0; j < NUM_PATTERNS; j++)
4920 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
4921 stmt_info);
4925 else
4927 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4928 for (si = bb_vinfo->region_begin;
4929 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4931 gimple *stmt = gsi_stmt (si);
4932 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
4933 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
4934 continue;
4936 /* Scan over all generic vect_recog_xxx_pattern functions. */
4937 for (j = 0; j < NUM_PATTERNS; j++)
4938 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);