Avoid matching the same pattern statement twice
[official-gcc.git] / gcc / tree-vect-patterns.c
blob4ffec66613bffbdff66390c9d2b17fb25b07fb05
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Report that we've found an instance of pattern PATTERN in
51 statement STMT. */
53 static void
54 vect_pattern_detected (const char *name, gimple *stmt)
56 if (dump_enabled_p ())
58 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: ", name);
59 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
63 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO.
64 Set its vector type to VECTYPE if it doesn't have one already. */
66 static void
67 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
68 tree vectype)
70 stmt_vec_info pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
71 if (pattern_stmt_info == NULL)
73 pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
74 orig_stmt_info->vinfo);
75 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
77 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
79 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info->stmt;
80 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
81 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
82 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
83 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
86 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
87 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
88 have one already. */
90 static void
91 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
92 tree vectype)
94 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
95 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
96 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
99 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
100 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
101 be different from the vector type of the final pattern statement. */
103 static inline void
104 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
105 tree vectype = NULL_TREE)
107 vec_info *vinfo = stmt_info->vinfo;
108 if (vectype)
110 gcc_assert (!vinfo_for_stmt (new_stmt));
111 stmt_vec_info new_stmt_info = new_stmt_vec_info (new_stmt, vinfo);
112 set_vinfo_for_stmt (new_stmt, new_stmt_info);
113 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
115 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
116 new_stmt);
119 static inline void
120 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
122 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
123 append_pattern_def_seq (stmt_info, stmt);
126 /* The caller wants to perform new operations on vect_external variable
127 VAR, so that the result of the operations would also be vect_external.
128 Return the edge on which the operations can be performed, if one exists.
129 Return null if the operations should instead be treated as part of
130 the pattern that needs them. */
132 static edge
133 vect_get_external_def_edge (vec_info *vinfo, tree var)
135 edge e = NULL;
136 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
138 e = loop_preheader_edge (loop_vinfo->loop);
139 if (!SSA_NAME_IS_DEFAULT_DEF (var))
141 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
142 if (bb == NULL
143 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
144 e = NULL;
147 return e;
150 /* Return true if the target supports a vector version of CODE,
151 where CODE is known to map to a direct optab. ITYPE specifies
152 the type of (some of) the scalar inputs and OTYPE specifies the
153 type of the scalar result.
155 If CODE allows the inputs and outputs to have different type
156 (such as for WIDEN_SUM_EXPR), it is the input mode rather
157 than the output mode that determines the appropriate target pattern.
158 Operand 0 of the target pattern then specifies the mode that the output
159 must have.
161 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
162 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
163 is nonnull. */
165 static bool
166 vect_supportable_direct_optab_p (tree otype, tree_code code,
167 tree itype, tree *vecotype_out,
168 tree *vecitype_out = NULL)
170 tree vecitype = get_vectype_for_scalar_type (itype);
171 if (!vecitype)
172 return false;
174 tree vecotype = get_vectype_for_scalar_type (otype);
175 if (!vecotype)
176 return false;
178 optab optab = optab_for_tree_code (code, vecitype, optab_default);
179 if (!optab)
180 return false;
182 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
183 if (icode == CODE_FOR_nothing
184 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
185 return false;
187 *vecotype_out = vecotype;
188 if (vecitype_out)
189 *vecitype_out = vecitype;
190 return true;
193 /* Check whether STMT2 is in the same loop or basic block as STMT1.
194 Which of the two applies depends on whether we're currently doing
195 loop-based or basic-block-based vectorization, as determined by
196 the vinfo_for_stmt for STMT1 (which must be defined).
198 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
199 to be defined as well. */
201 static bool
202 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
204 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
205 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
208 /* If the LHS of DEF_STMT has a single use, and that statement is
209 in the same loop or basic block, return it. */
211 static gimple *
212 vect_single_imm_use (gimple *def_stmt)
214 tree lhs = gimple_assign_lhs (def_stmt);
215 use_operand_p use_p;
216 gimple *use_stmt;
218 if (!single_imm_use (lhs, &use_p, &use_stmt))
219 return NULL;
221 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
222 return NULL;
224 return use_stmt;
227 /* Round bit precision PRECISION up to a full element. */
229 static unsigned int
230 vect_element_precision (unsigned int precision)
232 precision = 1 << ceil_log2 (precision);
233 return MAX (precision, BITS_PER_UNIT);
236 /* If OP is defined by a statement that's being considered for vectorization,
237 return information about that statement, otherwise return NULL. */
239 static stmt_vec_info
240 vect_get_internal_def (vec_info *vinfo, tree op)
242 vect_def_type dt;
243 gimple *def_stmt;
244 if (TREE_CODE (op) != SSA_NAME
245 || !vect_is_simple_use (op, vinfo, &dt, &def_stmt)
246 || dt != vect_internal_def)
247 return NULL;
249 return vinfo_for_stmt (def_stmt);
252 /* Check whether NAME, an ssa-name used in USE_STMT,
253 is a result of a type promotion, such that:
254 DEF_STMT: NAME = NOP (name0)
255 If CHECK_SIGN is TRUE, check that either both types are signed or both are
256 unsigned. */
258 static bool
259 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
260 tree *orig_type, gimple **def_stmt, bool *promotion)
262 stmt_vec_info stmt_vinfo;
263 tree type = TREE_TYPE (name);
264 tree oprnd0;
265 enum vect_def_type dt;
267 stmt_vinfo = vinfo_for_stmt (use_stmt);
268 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, def_stmt))
269 return false;
271 if (dt != vect_internal_def
272 && dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 if (!*def_stmt)
276 return false;
278 if (!is_gimple_assign (*def_stmt))
279 return false;
281 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
282 return false;
284 oprnd0 = gimple_assign_rhs1 (*def_stmt);
286 *orig_type = TREE_TYPE (oprnd0);
287 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
288 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
289 return false;
291 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
292 *promotion = true;
293 else
294 *promotion = false;
296 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
297 return false;
299 return true;
302 /* Holds information about an input operand after some sign changes
303 and type promotions have been peeled away. */
304 struct vect_unpromoted_value {
305 vect_unpromoted_value ();
307 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
309 /* The value obtained after peeling away zero or more casts. */
310 tree op;
312 /* The type of OP. */
313 tree type;
315 /* The definition type of OP. */
316 vect_def_type dt;
318 /* If OP is the result of peeling at least one cast, and if the cast
319 of OP itself is a vectorizable statement, CASTER identifies that
320 statement, otherwise it is null. */
321 stmt_vec_info caster;
324 inline vect_unpromoted_value::vect_unpromoted_value ()
325 : op (NULL_TREE),
326 type (NULL_TREE),
327 dt (vect_uninitialized_def),
328 caster (NULL)
332 /* Set the operand to OP_IN, its definition type to DT_IN, and the
333 statement that casts it to CASTER_IN. */
335 inline void
336 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
337 stmt_vec_info caster_in)
339 op = op_in;
340 type = TREE_TYPE (op);
341 dt = dt_in;
342 caster = caster_in;
345 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
346 to reach some vectorizable inner operand OP', continuing as long as it
347 is possible to convert OP' back to OP using a possible sign change
348 followed by a possible promotion P. Return this OP', or null if OP is
349 not a vectorizable SSA name. If there is a promotion P, describe its
350 input in UNPROM, otherwise describe OP' in UNPROM.
352 A successful return means that it is possible to go from OP' to OP
353 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
354 whereas the cast from UNPROM to OP might be a promotion, a sign
355 change, or a nop.
357 E.g. say we have:
359 signed short *ptr = ...;
360 signed short C = *ptr;
361 unsigned short B = (unsigned short) C; // sign change
362 signed int A = (signed int) B; // unsigned promotion
363 ...possible other uses of A...
364 unsigned int OP = (unsigned int) A; // sign change
366 In this case it's possible to go directly from C to OP using:
368 OP = (unsigned int) (unsigned short) C;
369 +------------+ +--------------+
370 promotion sign change
372 so OP' would be C. The input to the promotion is B, so UNPROM
373 would describe B. */
375 static tree
376 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
377 vect_unpromoted_value *unprom)
379 tree res = NULL_TREE;
380 tree op_type = TREE_TYPE (op);
381 unsigned int orig_precision = TYPE_PRECISION (op_type);
382 stmt_vec_info caster = NULL;
383 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
385 /* See whether OP is simple enough to vectorize. */
386 gimple *def_stmt;
387 vect_def_type dt;
388 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt))
389 break;
391 /* If OP is the input of a demotion, skip over it to see whether
392 OP is itself the result of a promotion. If so, the combined
393 effect of the promotion and the demotion might fit the required
394 pattern, otherwise neither operation fits.
396 This copes with cases such as the result of an arithmetic
397 operation being truncated before being stored, and where that
398 arithmetic operation has been recognized as an over-widened one. */
399 if (TYPE_PRECISION (op_type) <= orig_precision)
401 /* Use OP as the UNPROM described above if we haven't yet
402 found a promotion, or if using the new input preserves the
403 sign of the previous promotion. */
404 if (!res
405 || TYPE_PRECISION (unprom->type) == orig_precision
406 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
407 unprom->set_op (op, dt, caster);
408 /* Stop if we've already seen a promotion and if this
409 conversion does more than change the sign. */
410 else if (TYPE_PRECISION (op_type)
411 != TYPE_PRECISION (unprom->type))
412 break;
414 /* The sequence now extends to OP. */
415 res = op;
418 /* See whether OP is defined by a cast. Record it as CASTER if
419 the cast is potentially vectorizable. */
420 if (!def_stmt)
421 break;
422 if (dt == vect_internal_def)
423 caster = vinfo_for_stmt (def_stmt);
424 else
425 caster = NULL;
426 gassign *assign = dyn_cast <gassign *> (def_stmt);
427 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
428 break;
430 /* Continue with the input to the cast. */
431 op = gimple_assign_rhs1 (def_stmt);
432 op_type = TREE_TYPE (op);
434 return res;
437 /* OP is an integer operand to an operation that returns TYPE, and we
438 want to treat the operation as a widening one. So far we can treat
439 it as widening from *COMMON_TYPE.
441 Return true if OP is suitable for such a widening operation,
442 either widening from *COMMON_TYPE or from some supertype of it.
443 Update *COMMON_TYPE to the supertype in the latter case.
445 SHIFT_P is true if OP is a shift amount. */
447 static bool
448 vect_joust_widened_integer (tree type, bool shift_p, tree op,
449 tree *common_type)
451 /* Calculate the minimum precision required by OP, without changing
452 the sign of either operand. */
453 unsigned int precision;
454 if (shift_p)
456 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
457 return false;
458 precision = TREE_INT_CST_LOW (op);
460 else
462 precision = wi::min_precision (wi::to_widest (op),
463 TYPE_SIGN (*common_type));
464 if (precision * 2 > TYPE_PRECISION (type))
465 return false;
468 /* If OP requires a wider type, switch to that type. The checks
469 above ensure that this is still narrower than the result. */
470 precision = vect_element_precision (precision);
471 if (TYPE_PRECISION (*common_type) < precision)
472 *common_type = build_nonstandard_integer_type
473 (precision, TYPE_UNSIGNED (*common_type));
474 return true;
477 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
478 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
480 static bool
481 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
483 if (types_compatible_p (*common_type, new_type))
484 return true;
486 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
487 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
488 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
489 return true;
491 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
492 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
493 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
495 *common_type = new_type;
496 return true;
499 /* We have mismatched signs, with the signed type being
500 no wider than the unsigned type. In this case we need
501 a wider signed type. */
502 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
503 TYPE_PRECISION (new_type));
504 precision *= 2;
505 if (precision * 2 > TYPE_PRECISION (type))
506 return false;
508 *common_type = build_nonstandard_integer_type (precision, false);
509 return true;
512 /* Check whether STMT_INFO can be viewed as a tree of integer operations
513 in which each node either performs CODE or WIDENED_CODE, and where
514 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
515 specifies the maximum number of leaf operands. SHIFT_P says whether
516 CODE and WIDENED_CODE are some sort of shift.
518 If STMT_INFO is such a tree, return the number of leaf operands
519 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
520 to a type that (a) is narrower than the result of STMT_INFO and
521 (b) can hold all leaf operand values.
523 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
524 exists. */
526 static unsigned int
527 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
528 tree_code widened_code, bool shift_p,
529 unsigned int max_nops,
530 vect_unpromoted_value *unprom, tree *common_type)
532 /* Check for an integer operation with the right code. */
533 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
534 if (!assign)
535 return 0;
537 tree_code rhs_code = gimple_assign_rhs_code (assign);
538 if (rhs_code != code && rhs_code != widened_code)
539 return 0;
541 tree type = gimple_expr_type (assign);
542 if (!INTEGRAL_TYPE_P (type))
543 return 0;
545 /* Assume that both operands will be leaf operands. */
546 max_nops -= 2;
548 /* Check the operands. */
549 unsigned int next_op = 0;
550 for (unsigned int i = 0; i < 2; ++i)
552 vect_unpromoted_value *this_unprom = &unprom[next_op];
553 unsigned int nops = 1;
554 tree op = gimple_op (assign, i + 1);
555 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
557 /* We already have a common type from earlier operands.
558 Update it to account for OP. */
559 this_unprom->set_op (op, vect_constant_def);
560 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
561 return 0;
563 else
565 /* Only allow shifts by constants. */
566 if (shift_p && i == 1)
567 return 0;
569 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
570 this_unprom))
571 return 0;
573 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
575 /* The operand isn't widened. If STMT_INFO has the code
576 for an unwidened operation, recursively check whether
577 this operand is a node of the tree. */
578 if (rhs_code != code
579 || max_nops == 0
580 || this_unprom->dt != vect_internal_def)
581 return 0;
583 /* Give back the leaf slot allocated above now that we're
584 not treating this as a leaf operand. */
585 max_nops += 1;
587 /* Recursively process the definition of the operand. */
588 stmt_vec_info def_stmt_info
589 = vinfo_for_stmt (SSA_NAME_DEF_STMT (this_unprom->op));
590 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
591 shift_p, max_nops, this_unprom,
592 common_type);
593 if (nops == 0)
594 return 0;
596 max_nops -= nops;
598 else
600 /* Make sure that the operand is narrower than the result. */
601 if (TYPE_PRECISION (this_unprom->type) * 2
602 > TYPE_PRECISION (type))
603 return 0;
605 /* Update COMMON_TYPE for the new operand. */
606 if (i == 0)
607 *common_type = this_unprom->type;
608 else if (!vect_joust_widened_type (type, this_unprom->type,
609 common_type))
610 return 0;
613 next_op += nops;
615 return next_op;
618 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
619 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
621 static tree
622 vect_recog_temp_ssa_var (tree type, gimple *stmt)
624 return make_temp_ssa_name (type, stmt, "patt");
627 /* Convert UNPROM to TYPE and return the result, adding new statements
628 to STMT_INFO's pattern definition statements if no better way is
629 available. VECTYPE is the vector form of TYPE. */
631 static tree
632 vect_convert_input (stmt_vec_info stmt_info, tree type,
633 vect_unpromoted_value *unprom, tree vectype)
635 /* Check for a no-op conversion. */
636 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
637 return unprom->op;
639 /* Allow the caller to create constant vect_unpromoted_values. */
640 if (TREE_CODE (unprom->op) == INTEGER_CST)
641 return wide_int_to_tree (type, wi::to_widest (unprom->op));
643 /* See if we can reuse an existing result. */
644 if (unprom->caster)
646 tree lhs = gimple_get_lhs (unprom->caster->stmt);
647 if (types_compatible_p (TREE_TYPE (lhs), type))
648 return lhs;
651 /* We need a new conversion statement. */
652 tree new_op = vect_recog_temp_ssa_var (type, NULL);
653 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
655 /* If OP is an external value, see if we can insert the new statement
656 on an incoming edge. */
657 if (unprom->dt == vect_external_def)
658 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
660 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
661 gcc_assert (!new_bb);
662 return new_op;
665 /* As a (common) last resort, add the statement to the pattern itself. */
666 append_pattern_def_seq (stmt_info, new_stmt, vectype);
667 return new_op;
670 /* Invoke vect_convert_input for N elements of UNPROM and store the
671 result in the corresponding elements of RESULT. */
673 static void
674 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
675 tree *result, tree type, vect_unpromoted_value *unprom,
676 tree vectype)
678 for (unsigned int i = 0; i < n; ++i)
680 unsigned int j;
681 for (j = 0; j < i; ++j)
682 if (unprom[j].op == unprom[i].op)
683 break;
684 if (j < i)
685 result[i] = result[j];
686 else
687 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
691 /* The caller has created a (possibly empty) sequence of pattern definition
692 statements followed by a single statement PATTERN_STMT. Cast the result
693 of this final statement to TYPE. If a new statement is needed, add
694 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
695 and return the new statement, otherwise return PATTERN_STMT as-is.
696 VECITYPE is the vector form of PATTERN_STMT's result type. */
698 static gimple *
699 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
700 tree vecitype)
702 tree lhs = gimple_get_lhs (pattern_stmt);
703 if (!types_compatible_p (type, TREE_TYPE (lhs)))
705 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
706 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
707 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
709 return pattern_stmt;
712 /* Return true if STMT_VINFO describes a reduction for which reassociation
713 is allowed. If STMT_INFO is part of a group, assume that it's part of
714 a reduction chain and optimistically assume that all statements
715 except the last allow reassociation. */
717 static bool
718 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
720 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
721 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
722 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
725 /* As above, but also require it to have code CODE and to be a reduction
726 in the outermost loop. When returning true, store the operands in
727 *OP0_OUT and *OP1_OUT. */
729 static bool
730 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
731 tree *op0_out, tree *op1_out)
733 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
734 if (!loop_info)
735 return false;
737 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
738 if (!assign || gimple_assign_rhs_code (assign) != code)
739 return false;
741 /* We don't allow changing the order of the computation in the inner-loop
742 when doing outer-loop vectorization. */
743 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
744 if (loop && nested_in_vect_loop_p (loop, assign))
745 return false;
747 if (!vect_reassociating_reduction_p (stmt_info))
748 return false;
750 *op0_out = gimple_assign_rhs1 (assign);
751 *op1_out = gimple_assign_rhs2 (assign);
752 return true;
755 /* Function vect_recog_dot_prod_pattern
757 Try to find the following pattern:
759 type x_t, y_t;
760 TYPE1 prod;
761 TYPE2 sum = init;
762 loop:
763 sum_0 = phi <init, sum_1>
764 S1 x_t = ...
765 S2 y_t = ...
766 S3 x_T = (TYPE1) x_t;
767 S4 y_T = (TYPE1) y_t;
768 S5 prod = x_T * y_T;
769 [S6 prod = (TYPE2) prod; #optional]
770 S7 sum_1 = prod + sum_0;
772 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
773 same size of 'TYPE1' or bigger. This is a special case of a reduction
774 computation.
776 Input:
778 * STMTS: Contains a stmt from which the pattern search begins. In the
779 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
780 will be detected.
782 Output:
784 * TYPE_OUT: The type of the output of this pattern.
786 * Return value: A new stmt that will be used to replace the sequence of
787 stmts that constitute the pattern. In this case it will be:
788 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
790 Note: The dot-prod idiom is a widening reduction pattern that is
791 vectorized without preserving all the intermediate results. It
792 produces only N/2 (widened) results (by summing up pairs of
793 intermediate results) rather than all N results. Therefore, we
794 cannot allow this pattern when we want to get all the results and in
795 the correct order (as is the case when this computation is in an
796 inner-loop nested in an outer-loop that us being vectorized). */
798 static gimple *
799 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_out)
801 gimple *last_stmt = (*stmts)[0];
802 tree oprnd0, oprnd1;
803 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
804 vec_info *vinfo = stmt_vinfo->vinfo;
805 tree type, half_type;
806 gimple *pattern_stmt;
807 tree var;
809 /* Look for the following pattern
810 DX = (TYPE1) X;
811 DY = (TYPE1) Y;
812 DPROD = DX * DY;
813 DDPROD = (TYPE2) DPROD;
814 sum_1 = DDPROD + sum_0;
815 In which
816 - DX is double the size of X
817 - DY is double the size of Y
818 - DX, DY, DPROD all have the same type
819 - sum is the same size of DPROD or bigger
820 - sum has been recognized as a reduction variable.
822 This is equivalent to:
823 DPROD = X w* Y; #widen mult
824 sum_1 = DPROD w+ sum_0; #widen summation
826 DPROD = X w* Y; #widen mult
827 sum_1 = DPROD + sum_0; #summation
830 /* Starting from LAST_STMT, follow the defs of its uses in search
831 of the above pattern. */
833 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
834 &oprnd0, &oprnd1))
835 return NULL;
837 type = gimple_expr_type (last_stmt);
839 vect_unpromoted_value unprom_mult;
840 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
842 /* So far so good. Since last_stmt was detected as a (summation) reduction,
843 we know that oprnd1 is the reduction variable (defined by a loop-header
844 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
845 Left to check that oprnd0 is defined by a (widen_)mult_expr */
846 if (!oprnd0)
847 return NULL;
849 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
850 if (!mult_vinfo)
851 return NULL;
853 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
854 inside the loop (in case we are analyzing an outer-loop). */
855 vect_unpromoted_value unprom0[2];
856 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
857 false, 2, unprom0, &half_type))
858 return NULL;
860 /* If there are two widening operations, make sure they agree on
861 the sign of the extension. */
862 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
863 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
864 return NULL;
866 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
868 tree half_vectype;
869 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
870 type_out, &half_vectype))
871 return NULL;
873 /* Get the inputs in the appropriate types. */
874 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
875 tree mult_oprnd[2];
876 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
877 unprom0, half_vectype);
879 var = vect_recog_temp_ssa_var (type, NULL);
880 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
881 mult_oprnd[0], mult_oprnd[1], oprnd1);
883 return pattern_stmt;
887 /* Function vect_recog_sad_pattern
889 Try to find the following Sum of Absolute Difference (SAD) pattern:
891 type x_t, y_t;
892 signed TYPE1 diff, abs_diff;
893 TYPE2 sum = init;
894 loop:
895 sum_0 = phi <init, sum_1>
896 S1 x_t = ...
897 S2 y_t = ...
898 S3 x_T = (TYPE1) x_t;
899 S4 y_T = (TYPE1) y_t;
900 S5 diff = x_T - y_T;
901 S6 abs_diff = ABS_EXPR <diff>;
902 [S7 abs_diff = (TYPE2) abs_diff; #optional]
903 S8 sum_1 = abs_diff + sum_0;
905 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
906 same size of 'TYPE1' or bigger. This is a special case of a reduction
907 computation.
909 Input:
911 * STMTS: Contains a stmt from which the pattern search begins. In the
912 example, when this function is called with S8, the pattern
913 {S3,S4,S5,S6,S7,S8} will be detected.
915 Output:
917 * TYPE_OUT: The type of the output of this pattern.
919 * Return value: A new stmt that will be used to replace the sequence of
920 stmts that constitute the pattern. In this case it will be:
921 SAD_EXPR <x_t, y_t, sum_0>
924 static gimple *
925 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_out)
927 gimple *last_stmt = (*stmts)[0];
928 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
929 vec_info *vinfo = stmt_vinfo->vinfo;
930 tree half_type;
932 /* Look for the following pattern
933 DX = (TYPE1) X;
934 DY = (TYPE1) Y;
935 DDIFF = DX - DY;
936 DAD = ABS_EXPR <DDIFF>;
937 DDPROD = (TYPE2) DPROD;
938 sum_1 = DAD + sum_0;
939 In which
940 - DX is at least double the size of X
941 - DY is at least double the size of Y
942 - DX, DY, DDIFF, DAD all have the same type
943 - sum is the same size of DAD or bigger
944 - sum has been recognized as a reduction variable.
946 This is equivalent to:
947 DDIFF = X w- Y; #widen sub
948 DAD = ABS_EXPR <DDIFF>;
949 sum_1 = DAD w+ sum_0; #widen summation
951 DDIFF = X w- Y; #widen sub
952 DAD = ABS_EXPR <DDIFF>;
953 sum_1 = DAD + sum_0; #summation
956 /* Starting from LAST_STMT, follow the defs of its uses in search
957 of the above pattern. */
959 tree plus_oprnd0, plus_oprnd1;
960 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
961 &plus_oprnd0, &plus_oprnd1))
962 return NULL;
964 tree sum_type = gimple_expr_type (last_stmt);
966 /* Any non-truncating sequence of conversions is OK here, since
967 with a successful match, the result of the ABS(U) is known to fit
968 within the nonnegative range of the result type. (It cannot be the
969 negative of the minimum signed value due to the range of the widening
970 MINUS_EXPR.) */
971 vect_unpromoted_value unprom_abs;
972 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
973 &unprom_abs);
975 /* So far so good. Since last_stmt was detected as a (summation) reduction,
976 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
977 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
978 Then check that plus_oprnd0 is defined by an abs_expr. */
980 if (!plus_oprnd0)
981 return NULL;
983 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
984 if (!abs_stmt_vinfo)
985 return NULL;
987 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
988 inside the loop (in case we are analyzing an outer-loop). */
989 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
990 if (!abs_stmt
991 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
992 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
993 return NULL;
995 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
996 tree abs_type = TREE_TYPE (abs_oprnd);
997 if (TYPE_UNSIGNED (abs_type))
998 return NULL;
1000 /* Peel off conversions from the ABS input. This can involve sign
1001 changes (e.g. from an unsigned subtraction to a signed ABS input)
1002 or signed promotion, but it can't include unsigned promotion.
1003 (Note that ABS of an unsigned promotion should have been folded
1004 away before now anyway.) */
1005 vect_unpromoted_value unprom_diff;
1006 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1007 &unprom_diff);
1008 if (!abs_oprnd)
1009 return NULL;
1010 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1011 && TYPE_UNSIGNED (unprom_diff.type))
1012 return NULL;
1014 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1015 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1016 if (!diff_stmt_vinfo)
1017 return NULL;
1019 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1020 inside the loop (in case we are analyzing an outer-loop). */
1021 vect_unpromoted_value unprom[2];
1022 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1023 false, 2, unprom, &half_type))
1024 return NULL;
1026 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1028 tree half_vectype;
1029 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1030 type_out, &half_vectype))
1031 return NULL;
1033 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1034 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1035 tree sad_oprnd[2];
1036 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1037 unprom, half_vectype);
1039 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1040 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1041 sad_oprnd[1], plus_oprnd1);
1043 return pattern_stmt;
1046 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1047 so that it can be treated as though it had the form:
1049 A_TYPE a;
1050 B_TYPE b;
1051 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1052 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1053 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1054 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1055 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1057 Try to replace the pattern with:
1059 A_TYPE a;
1060 B_TYPE b;
1061 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1062 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1063 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1064 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1066 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1068 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1069 name of the pattern being matched, for dump purposes. */
1071 static gimple *
1072 vect_recog_widen_op_pattern (vec<gimple *> *stmts, tree *type_out,
1073 tree_code orig_code, tree_code wide_code,
1074 bool shift_p, const char *name)
1076 gimple *last_stmt = stmts->pop ();
1077 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
1079 vect_unpromoted_value unprom[2];
1080 tree half_type;
1081 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1082 shift_p, 2, unprom, &half_type))
1083 return NULL;
1085 /* Pattern detected. */
1086 vect_pattern_detected (name, last_stmt);
1088 tree type = gimple_expr_type (last_stmt);
1089 tree itype = type;
1090 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1091 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1092 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1093 TYPE_UNSIGNED (half_type));
1095 /* Check target support */
1096 tree vectype = get_vectype_for_scalar_type (half_type);
1097 tree vecitype = get_vectype_for_scalar_type (itype);
1098 enum tree_code dummy_code;
1099 int dummy_int;
1100 auto_vec<tree> dummy_vec;
1101 if (!vectype
1102 || !vecitype
1103 || !supportable_widening_operation (wide_code, last_stmt,
1104 vecitype, vectype,
1105 &dummy_code, &dummy_code,
1106 &dummy_int, &dummy_vec))
1107 return NULL;
1109 *type_out = get_vectype_for_scalar_type (type);
1110 if (!*type_out)
1111 return NULL;
1113 STMT_VINFO_PATTERN_DEF_SEQ (last_stmt_info) = NULL;
1114 tree oprnd[2];
1115 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1117 tree var = vect_recog_temp_ssa_var (itype, NULL);
1118 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1119 oprnd[0], oprnd[1]);
1121 stmts->safe_push (last_stmt);
1122 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1125 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1126 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1128 static gimple *
1129 vect_recog_widen_mult_pattern (vec<gimple *> *stmts, tree *type_out)
1131 return vect_recog_widen_op_pattern (stmts, type_out, MULT_EXPR,
1132 WIDEN_MULT_EXPR, false,
1133 "vect_recog_widen_mult_pattern");
1136 /* Function vect_recog_pow_pattern
1138 Try to find the following pattern:
1140 x = POW (y, N);
1142 with POW being one of pow, powf, powi, powif and N being
1143 either 2 or 0.5.
1145 Input:
1147 * LAST_STMT: A stmt from which the pattern search begins.
1149 Output:
1151 * TYPE_OUT: The type of the output of this pattern.
1153 * Return value: A new stmt that will be used to replace the sequence of
1154 stmts that constitute the pattern. In this case it will be:
1155 x = x * x
1157 x = sqrt (x)
1160 static gimple *
1161 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_out)
1163 gimple *last_stmt = (*stmts)[0];
1164 tree base, exp;
1165 gimple *stmt;
1166 tree var;
1168 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1169 return NULL;
1171 switch (gimple_call_combined_fn (last_stmt))
1173 CASE_CFN_POW:
1174 CASE_CFN_POWI:
1175 break;
1177 default:
1178 return NULL;
1181 base = gimple_call_arg (last_stmt, 0);
1182 exp = gimple_call_arg (last_stmt, 1);
1183 if (TREE_CODE (exp) != REAL_CST
1184 && TREE_CODE (exp) != INTEGER_CST)
1186 if (flag_unsafe_math_optimizations
1187 && TREE_CODE (base) == REAL_CST
1188 && !gimple_call_internal_p (last_stmt))
1190 combined_fn log_cfn;
1191 built_in_function exp_bfn;
1192 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1194 case BUILT_IN_POW:
1195 log_cfn = CFN_BUILT_IN_LOG;
1196 exp_bfn = BUILT_IN_EXP;
1197 break;
1198 case BUILT_IN_POWF:
1199 log_cfn = CFN_BUILT_IN_LOGF;
1200 exp_bfn = BUILT_IN_EXPF;
1201 break;
1202 case BUILT_IN_POWL:
1203 log_cfn = CFN_BUILT_IN_LOGL;
1204 exp_bfn = BUILT_IN_EXPL;
1205 break;
1206 default:
1207 return NULL;
1209 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1210 tree exp_decl = builtin_decl_implicit (exp_bfn);
1211 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1212 does that, but if C is a power of 2, we want to use
1213 exp2 (log2 (C) * x) in the non-vectorized version, but for
1214 vectorization we don't have vectorized exp2. */
1215 if (logc
1216 && TREE_CODE (logc) == REAL_CST
1217 && exp_decl
1218 && lookup_attribute ("omp declare simd",
1219 DECL_ATTRIBUTES (exp_decl)))
1221 cgraph_node *node = cgraph_node::get_create (exp_decl);
1222 if (node->simd_clones == NULL)
1224 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1225 || node->definition)
1226 return NULL;
1227 expand_simd_clones (node);
1228 if (node->simd_clones == NULL)
1229 return NULL;
1231 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1232 if (!*type_out)
1233 return NULL;
1234 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1235 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1236 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1237 new_pattern_def_seq (stmt_vinfo, g);
1238 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1239 g = gimple_build_call (exp_decl, 1, def);
1240 gimple_call_set_lhs (g, res);
1241 return g;
1245 return NULL;
1248 /* We now have a pow or powi builtin function call with a constant
1249 exponent. */
1251 /* Catch squaring. */
1252 if ((tree_fits_shwi_p (exp)
1253 && tree_to_shwi (exp) == 2)
1254 || (TREE_CODE (exp) == REAL_CST
1255 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1257 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1258 TREE_TYPE (base), type_out))
1259 return NULL;
1261 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1262 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1263 return stmt;
1266 /* Catch square root. */
1267 if (TREE_CODE (exp) == REAL_CST
1268 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1270 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1271 if (*type_out
1272 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1273 OPTIMIZE_FOR_SPEED))
1275 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1276 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1277 gimple_call_set_lhs (stmt, var);
1278 gimple_call_set_nothrow (stmt, true);
1279 return stmt;
1283 return NULL;
1287 /* Function vect_recog_widen_sum_pattern
1289 Try to find the following pattern:
1291 type x_t;
1292 TYPE x_T, sum = init;
1293 loop:
1294 sum_0 = phi <init, sum_1>
1295 S1 x_t = *p;
1296 S2 x_T = (TYPE) x_t;
1297 S3 sum_1 = x_T + sum_0;
1299 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1300 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1301 a special case of a reduction computation.
1303 Input:
1305 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1306 when this function is called with S3, the pattern {S2,S3} will be detected.
1308 Output:
1310 * TYPE_OUT: The type of the output of this pattern.
1312 * Return value: A new stmt that will be used to replace the sequence of
1313 stmts that constitute the pattern. In this case it will be:
1314 WIDEN_SUM <x_t, sum_0>
1316 Note: The widening-sum idiom is a widening reduction pattern that is
1317 vectorized without preserving all the intermediate results. It
1318 produces only N/2 (widened) results (by summing up pairs of
1319 intermediate results) rather than all N results. Therefore, we
1320 cannot allow this pattern when we want to get all the results and in
1321 the correct order (as is the case when this computation is in an
1322 inner-loop nested in an outer-loop that us being vectorized). */
1324 static gimple *
1325 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_out)
1327 gimple *last_stmt = (*stmts)[0];
1328 tree oprnd0, oprnd1;
1329 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1330 vec_info *vinfo = stmt_vinfo->vinfo;
1331 tree type;
1332 gimple *pattern_stmt;
1333 tree var;
1335 /* Look for the following pattern
1336 DX = (TYPE) X;
1337 sum_1 = DX + sum_0;
1338 In which DX is at least double the size of X, and sum_1 has been
1339 recognized as a reduction variable.
1342 /* Starting from LAST_STMT, follow the defs of its uses in search
1343 of the above pattern. */
1345 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1346 &oprnd0, &oprnd1))
1347 return NULL;
1349 type = gimple_expr_type (last_stmt);
1351 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1352 we know that oprnd1 is the reduction variable (defined by a loop-header
1353 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1354 Left to check that oprnd0 is defined by a cast from type 'type' to type
1355 'TYPE'. */
1357 vect_unpromoted_value unprom0;
1358 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1359 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1360 return NULL;
1362 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1364 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1365 type_out))
1366 return NULL;
1368 var = vect_recog_temp_ssa_var (type, NULL);
1369 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1371 return pattern_stmt;
1375 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1377 Input:
1378 STMT - a statement to check.
1379 DEF - we support operations with two operands, one of which is constant.
1380 The other operand can be defined by a demotion operation, or by a
1381 previous statement in a sequence of over-promoted operations. In the
1382 later case DEF is used to replace that operand. (It is defined by a
1383 pattern statement we created for the previous statement in the
1384 sequence).
1386 Input/output:
1387 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1388 NULL, it's the type of DEF.
1389 STMTS - additional pattern statements. If a pattern statement (type
1390 conversion) is created in this function, its original statement is
1391 added to STMTS.
1393 Output:
1394 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1395 operands to use in the new pattern statement for STMT (will be created
1396 in vect_recog_over_widening_pattern ()).
1397 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1398 statements for STMT: the first one is a type promotion and the second
1399 one is the operation itself. We return the type promotion statement
1400 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1401 the second pattern statement. */
1403 static bool
1404 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1405 tree *op0, tree *op1, gimple **new_def_stmt,
1406 vec<gimple *> *stmts)
1408 enum tree_code code;
1409 tree const_oprnd, oprnd;
1410 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1411 gimple *def_stmt, *new_stmt;
1412 bool first = false;
1413 bool promotion;
1415 *op0 = NULL_TREE;
1416 *op1 = NULL_TREE;
1417 *new_def_stmt = NULL;
1419 if (!is_gimple_assign (stmt))
1420 return false;
1422 code = gimple_assign_rhs_code (stmt);
1423 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1424 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1425 return false;
1427 oprnd = gimple_assign_rhs1 (stmt);
1428 const_oprnd = gimple_assign_rhs2 (stmt);
1429 type = gimple_expr_type (stmt);
1431 if (TREE_CODE (oprnd) != SSA_NAME
1432 || TREE_CODE (const_oprnd) != INTEGER_CST)
1433 return false;
1435 /* If oprnd has other uses besides that in stmt we cannot mark it
1436 as being part of a pattern only. */
1437 if (!has_single_use (oprnd))
1438 return false;
1440 /* If we are in the middle of a sequence, we use DEF from a previous
1441 statement. Otherwise, OPRND has to be a result of type promotion. */
1442 if (*new_type)
1444 half_type = *new_type;
1445 oprnd = def;
1447 else
1449 first = true;
1450 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1451 &promotion)
1452 || !promotion
1453 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1454 return false;
1457 /* Can we perform the operation on a smaller type? */
1458 switch (code)
1460 case BIT_IOR_EXPR:
1461 case BIT_XOR_EXPR:
1462 case BIT_AND_EXPR:
1463 if (!int_fits_type_p (const_oprnd, half_type))
1465 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1466 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1467 return false;
1469 interm_type = build_nonstandard_integer_type (
1470 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1471 if (!int_fits_type_p (const_oprnd, interm_type))
1472 return false;
1475 break;
1477 case LSHIFT_EXPR:
1478 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1479 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1480 return false;
1482 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1483 (e.g., if the original value was char, the shift amount is at most 8
1484 if we want to use short). */
1485 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1486 return false;
1488 interm_type = build_nonstandard_integer_type (
1489 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1491 if (!vect_supportable_shift (code, interm_type))
1492 return false;
1494 break;
1496 case RSHIFT_EXPR:
1497 if (vect_supportable_shift (code, half_type))
1498 break;
1500 /* Try intermediate type - HALF_TYPE is not supported. */
1501 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1502 return false;
1504 interm_type = build_nonstandard_integer_type (
1505 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1507 if (!vect_supportable_shift (code, interm_type))
1508 return false;
1510 break;
1512 default:
1513 gcc_unreachable ();
1516 /* There are four possible cases:
1517 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1518 the first statement in the sequence)
1519 a. The original, HALF_TYPE, is not enough - we replace the promotion
1520 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1521 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1522 promotion.
1523 2. OPRND is defined by a pattern statement we created.
1524 a. Its type is not sufficient for the operation, we create a new stmt:
1525 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1526 this statement in NEW_DEF_STMT, and it is later put in
1527 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1528 b. OPRND is good to use in the new statement. */
1529 if (first)
1531 if (interm_type)
1533 /* Replace the original type conversion HALF_TYPE->TYPE with
1534 HALF_TYPE->INTERM_TYPE. */
1535 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1537 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1538 /* Check if the already created pattern stmt is what we need. */
1539 if (!is_gimple_assign (new_stmt)
1540 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1541 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1542 return false;
1544 stmts->safe_push (def_stmt);
1545 oprnd = gimple_assign_lhs (new_stmt);
1547 else
1549 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1550 oprnd = gimple_assign_rhs1 (def_stmt);
1551 new_oprnd = make_ssa_name (interm_type);
1552 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1553 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1554 stmts->safe_push (def_stmt);
1555 oprnd = new_oprnd;
1558 else
1560 /* Retrieve the operand before the type promotion. */
1561 oprnd = gimple_assign_rhs1 (def_stmt);
1564 else
1566 if (interm_type)
1568 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1569 new_oprnd = make_ssa_name (interm_type);
1570 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1571 oprnd = new_oprnd;
1572 *new_def_stmt = new_stmt;
1575 /* Otherwise, OPRND is already set. */
1578 if (interm_type)
1579 *new_type = interm_type;
1580 else
1581 *new_type = half_type;
1583 *op0 = oprnd;
1584 *op1 = fold_convert (*new_type, const_oprnd);
1586 return true;
1590 /* Try to find a statement or a sequence of statements that can be performed
1591 on a smaller type:
1593 type x_t;
1594 TYPE x_T, res0_T, res1_T;
1595 loop:
1596 S1 x_t = *p;
1597 S2 x_T = (TYPE) x_t;
1598 S3 res0_T = op (x_T, C0);
1599 S4 res1_T = op (res0_T, C1);
1600 S5 ... = () res1_T; - type demotion
1602 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1603 constants.
1604 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1605 be 'type' or some intermediate type. For now, we expect S5 to be a type
1606 demotion operation. We also check that S3 and S4 have only one use. */
1608 static gimple *
1609 vect_recog_over_widening_pattern (vec<gimple *> *stmts, tree *type_out)
1611 gimple *stmt = stmts->pop ();
1612 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1613 *use_stmt = NULL;
1614 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1615 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1616 bool first;
1617 tree type = NULL;
1619 first = true;
1620 while (1)
1622 if (!vinfo_for_stmt (stmt)
1623 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1624 return NULL;
1626 new_def_stmt = NULL;
1627 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1628 &op0, &op1, &new_def_stmt,
1629 stmts))
1631 if (first)
1632 return NULL;
1633 else
1634 break;
1637 /* STMT can be performed on a smaller type. Check its uses. */
1638 use_stmt = vect_single_imm_use (stmt);
1639 if (!use_stmt || !is_gimple_assign (use_stmt))
1640 return NULL;
1642 /* Create pattern statement for STMT. */
1643 vectype = get_vectype_for_scalar_type (new_type);
1644 if (!vectype)
1645 return NULL;
1647 /* We want to collect all the statements for which we create pattern
1648 statetments, except for the case when the last statement in the
1649 sequence doesn't have a corresponding pattern statement. In such
1650 case we associate the last pattern statement with the last statement
1651 in the sequence. Therefore, we only add the original statement to
1652 the list if we know that it is not the last. */
1653 if (prev_stmt)
1654 stmts->safe_push (prev_stmt);
1656 var = vect_recog_temp_ssa_var (new_type, NULL);
1657 pattern_stmt
1658 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1659 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1660 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1662 if (dump_enabled_p ())
1664 dump_printf_loc (MSG_NOTE, vect_location,
1665 "created pattern stmt: ");
1666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1669 type = gimple_expr_type (stmt);
1670 prev_stmt = stmt;
1671 stmt = use_stmt;
1673 first = false;
1676 /* We got a sequence. We expect it to end with a type demotion operation.
1677 Otherwise, we quit (for now). There are three possible cases: the
1678 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1679 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1680 NEW_TYPE differs (we create a new conversion statement). */
1681 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1683 use_lhs = gimple_assign_lhs (use_stmt);
1684 use_type = TREE_TYPE (use_lhs);
1685 /* Support only type demotion or signedess change. */
1686 if (!INTEGRAL_TYPE_P (use_type)
1687 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1688 return NULL;
1690 /* Check that NEW_TYPE is not bigger than the conversion result. */
1691 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1692 return NULL;
1694 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1695 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1697 *type_out = get_vectype_for_scalar_type (use_type);
1698 if (!*type_out)
1699 return NULL;
1701 /* Create NEW_TYPE->USE_TYPE conversion. */
1702 new_oprnd = make_ssa_name (use_type);
1703 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1704 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1706 /* We created a pattern statement for the last statement in the
1707 sequence, so we don't need to associate it with the pattern
1708 statement created for PREV_STMT. Therefore, we add PREV_STMT
1709 to the list in order to mark it later in vect_pattern_recog_1. */
1710 if (prev_stmt)
1711 stmts->safe_push (prev_stmt);
1713 else
1715 if (prev_stmt)
1716 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1717 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1719 *type_out = vectype;
1722 stmts->safe_push (use_stmt);
1724 else
1725 /* TODO: support general case, create a conversion to the correct type. */
1726 return NULL;
1728 /* Pattern detected. */
1729 vect_pattern_detected ("vect_recog_over_widening_pattern", stmts->last ());
1731 return pattern_stmt;
1734 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1735 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1737 static gimple *
1738 vect_recog_widen_shift_pattern (vec<gimple *> *stmts, tree *type_out)
1740 return vect_recog_widen_op_pattern (stmts, type_out, LSHIFT_EXPR,
1741 WIDEN_LSHIFT_EXPR, true,
1742 "vect_recog_widen_shift_pattern");
1745 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1747 type a_t, b_t, c_t;
1749 S0 a_t = b_t r<< c_t;
1751 Input/Output:
1753 * STMTS: Contains a stmt from which the pattern search begins,
1754 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1755 with a sequence:
1757 S1 d_t = -c_t;
1758 S2 e_t = d_t & (B - 1);
1759 S3 f_t = b_t << c_t;
1760 S4 g_t = b_t >> e_t;
1761 S0 a_t = f_t | g_t;
1763 where B is element bitsize of type.
1765 Output:
1767 * TYPE_OUT: The type of the output of this pattern.
1769 * Return value: A new stmt that will be used to replace the rotate
1770 S0 stmt. */
1772 static gimple *
1773 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_out)
1775 gimple *last_stmt = stmts->pop ();
1776 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1777 gimple *pattern_stmt, *def_stmt;
1778 enum tree_code rhs_code;
1779 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1780 vec_info *vinfo = stmt_vinfo->vinfo;
1781 enum vect_def_type dt;
1782 optab optab1, optab2;
1783 edge ext_def = NULL;
1785 if (!is_gimple_assign (last_stmt))
1786 return NULL;
1788 rhs_code = gimple_assign_rhs_code (last_stmt);
1789 switch (rhs_code)
1791 case LROTATE_EXPR:
1792 case RROTATE_EXPR:
1793 break;
1794 default:
1795 return NULL;
1798 lhs = gimple_assign_lhs (last_stmt);
1799 oprnd0 = gimple_assign_rhs1 (last_stmt);
1800 type = TREE_TYPE (oprnd0);
1801 oprnd1 = gimple_assign_rhs2 (last_stmt);
1802 if (TREE_CODE (oprnd0) != SSA_NAME
1803 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1804 || !INTEGRAL_TYPE_P (type)
1805 || !TYPE_UNSIGNED (type))
1806 return NULL;
1808 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt))
1809 return NULL;
1811 if (dt != vect_internal_def
1812 && dt != vect_constant_def
1813 && dt != vect_external_def)
1814 return NULL;
1816 vectype = get_vectype_for_scalar_type (type);
1817 if (vectype == NULL_TREE)
1818 return NULL;
1820 /* If vector/vector or vector/scalar rotate is supported by the target,
1821 don't do anything here. */
1822 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1823 if (optab1
1824 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1825 return NULL;
1827 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1829 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1830 if (optab2
1831 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1832 return NULL;
1835 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1836 don't do anything here either. */
1837 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1838 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1839 if (!optab1
1840 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1841 || !optab2
1842 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1844 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1845 return NULL;
1846 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1847 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1848 if (!optab1
1849 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1850 || !optab2
1851 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1852 return NULL;
1855 *type_out = vectype;
1857 if (dt == vect_external_def
1858 && TREE_CODE (oprnd1) == SSA_NAME)
1859 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
1861 def = NULL_TREE;
1862 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1863 if (TREE_CODE (oprnd1) == INTEGER_CST
1864 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1865 def = oprnd1;
1866 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1868 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1869 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1870 && TYPE_PRECISION (TREE_TYPE (rhs1))
1871 == TYPE_PRECISION (type))
1872 def = rhs1;
1875 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1876 if (def == NULL_TREE)
1878 def = vect_recog_temp_ssa_var (type, NULL);
1879 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1880 if (ext_def)
1882 basic_block new_bb
1883 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1884 gcc_assert (!new_bb);
1886 else
1887 append_pattern_def_seq (stmt_vinfo, def_stmt);
1889 stype = TREE_TYPE (def);
1890 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1892 if (TREE_CODE (def) == INTEGER_CST)
1894 if (!tree_fits_uhwi_p (def)
1895 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
1896 || integer_zerop (def))
1897 return NULL;
1898 def2 = build_int_cst (stype,
1899 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
1901 else
1903 tree vecstype = get_vectype_for_scalar_type (stype);
1904 stmt_vec_info def_stmt_vinfo;
1906 if (vecstype == NULL_TREE)
1907 return NULL;
1908 def2 = vect_recog_temp_ssa_var (stype, NULL);
1909 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1910 if (ext_def)
1912 basic_block new_bb
1913 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1914 gcc_assert (!new_bb);
1916 else
1918 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1919 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1920 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1921 append_pattern_def_seq (stmt_vinfo, def_stmt);
1924 def2 = vect_recog_temp_ssa_var (stype, NULL);
1925 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
1926 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1927 gimple_assign_lhs (def_stmt), mask);
1928 if (ext_def)
1930 basic_block new_bb
1931 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1932 gcc_assert (!new_bb);
1934 else
1936 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1937 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1938 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1939 append_pattern_def_seq (stmt_vinfo, def_stmt);
1943 var1 = vect_recog_temp_ssa_var (type, NULL);
1944 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1945 ? LSHIFT_EXPR : RSHIFT_EXPR,
1946 oprnd0, def);
1947 append_pattern_def_seq (stmt_vinfo, def_stmt);
1949 var2 = vect_recog_temp_ssa_var (type, NULL);
1950 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1951 ? RSHIFT_EXPR : LSHIFT_EXPR,
1952 oprnd0, def2);
1953 append_pattern_def_seq (stmt_vinfo, def_stmt);
1955 /* Pattern detected. */
1956 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
1958 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1959 var = vect_recog_temp_ssa_var (type, NULL);
1960 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1962 stmts->safe_push (last_stmt);
1963 return pattern_stmt;
1966 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1967 vectorized:
1969 type a_t;
1970 TYPE b_T, res_T;
1972 S1 a_t = ;
1973 S2 b_T = ;
1974 S3 res_T = b_T op a_t;
1976 where type 'TYPE' is a type with different size than 'type',
1977 and op is <<, >> or rotate.
1979 Also detect cases:
1981 type a_t;
1982 TYPE b_T, c_T, res_T;
1984 S0 c_T = ;
1985 S1 a_t = (type) c_T;
1986 S2 b_T = ;
1987 S3 res_T = b_T op a_t;
1989 Input/Output:
1991 * STMTS: Contains a stmt from which the pattern search begins,
1992 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1993 with a shift/rotate which has same type on both operands, in the
1994 second case just b_T op c_T, in the first case with added cast
1995 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1997 Output:
1999 * TYPE_OUT: The type of the output of this pattern.
2001 * Return value: A new stmt that will be used to replace the shift/rotate
2002 S3 stmt. */
2004 static gimple *
2005 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts, tree *type_out)
2007 gimple *last_stmt = stmts->pop ();
2008 tree oprnd0, oprnd1, lhs, var;
2009 gimple *pattern_stmt;
2010 enum tree_code rhs_code;
2011 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2012 vec_info *vinfo = stmt_vinfo->vinfo;
2014 if (!is_gimple_assign (last_stmt))
2015 return NULL;
2017 rhs_code = gimple_assign_rhs_code (last_stmt);
2018 switch (rhs_code)
2020 case LSHIFT_EXPR:
2021 case RSHIFT_EXPR:
2022 case LROTATE_EXPR:
2023 case RROTATE_EXPR:
2024 break;
2025 default:
2026 return NULL;
2029 lhs = gimple_assign_lhs (last_stmt);
2030 oprnd0 = gimple_assign_rhs1 (last_stmt);
2031 oprnd1 = gimple_assign_rhs2 (last_stmt);
2032 if (TREE_CODE (oprnd0) != SSA_NAME
2033 || TREE_CODE (oprnd1) != SSA_NAME
2034 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2035 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2036 || TYPE_PRECISION (TREE_TYPE (lhs))
2037 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2038 return NULL;
2040 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2041 if (!def_vinfo)
2042 return NULL;
2044 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2045 if (*type_out == NULL_TREE)
2046 return NULL;
2048 tree def = NULL_TREE;
2049 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2050 if (def_stmt && gimple_assign_cast_p (def_stmt))
2052 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2053 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2054 && TYPE_PRECISION (TREE_TYPE (rhs1))
2055 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2057 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2058 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2059 def = rhs1;
2060 else
2062 tree mask
2063 = build_low_bits_mask (TREE_TYPE (rhs1),
2064 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2065 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2066 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2067 stmt_vec_info new_stmt_info
2068 = new_stmt_vec_info (def_stmt, vinfo);
2069 set_vinfo_for_stmt (def_stmt, new_stmt_info);
2070 STMT_VINFO_VECTYPE (new_stmt_info)
2071 = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2072 new_pattern_def_seq (stmt_vinfo, def_stmt);
2077 if (def == NULL_TREE)
2079 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2080 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2081 new_pattern_def_seq (stmt_vinfo, def_stmt);
2084 /* Pattern detected. */
2085 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2087 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2088 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2089 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2091 stmts->safe_push (last_stmt);
2092 return pattern_stmt;
2095 /* Return true iff the target has a vector optab implementing the operation
2096 CODE on type VECTYPE. */
2098 static bool
2099 target_has_vecop_for_code (tree_code code, tree vectype)
2101 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2102 return voptab
2103 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2106 /* Verify that the target has optabs of VECTYPE to perform all the steps
2107 needed by the multiplication-by-immediate synthesis algorithm described by
2108 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2109 present. Return true iff the target supports all the steps. */
2111 static bool
2112 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2113 tree vectype, bool synth_shift_p)
2115 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2116 return false;
2118 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2119 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2121 if (var == negate_variant
2122 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2123 return false;
2125 /* If we must synthesize shifts with additions make sure that vector
2126 addition is available. */
2127 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2128 return false;
2130 for (int i = 1; i < alg->ops; i++)
2132 switch (alg->op[i])
2134 case alg_shift:
2135 break;
2136 case alg_add_t_m2:
2137 case alg_add_t2_m:
2138 case alg_add_factor:
2139 if (!supports_vplus)
2140 return false;
2141 break;
2142 case alg_sub_t_m2:
2143 case alg_sub_t2_m:
2144 case alg_sub_factor:
2145 if (!supports_vminus)
2146 return false;
2147 break;
2148 case alg_unknown:
2149 case alg_m:
2150 case alg_zero:
2151 case alg_impossible:
2152 return false;
2153 default:
2154 gcc_unreachable ();
2158 return true;
2161 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2162 putting the final result in DEST. Append all statements but the last into
2163 VINFO. Return the last statement. */
2165 static gimple *
2166 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2167 stmt_vec_info vinfo)
2169 HOST_WIDE_INT i;
2170 tree itype = TREE_TYPE (op);
2171 tree prev_res = op;
2172 gcc_assert (amnt >= 0);
2173 for (i = 0; i < amnt; i++)
2175 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2176 : dest;
2177 gimple *stmt
2178 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2179 prev_res = tmp_var;
2180 if (i < amnt - 1)
2181 append_pattern_def_seq (vinfo, stmt);
2182 else
2183 return stmt;
2185 gcc_unreachable ();
2186 return NULL;
2189 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2190 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2191 the process if necessary. Append the resulting assignment statements
2192 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2193 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2194 left shifts using additions. */
2196 static tree
2197 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2198 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2200 if (integer_zerop (op2)
2201 && (code == LSHIFT_EXPR
2202 || code == PLUS_EXPR))
2204 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2205 return op1;
2208 gimple *stmt;
2209 tree itype = TREE_TYPE (op1);
2210 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2212 if (code == LSHIFT_EXPR
2213 && synth_shift_p)
2215 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2216 stmt_vinfo);
2217 append_pattern_def_seq (stmt_vinfo, stmt);
2218 return tmp_var;
2221 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2222 append_pattern_def_seq (stmt_vinfo, stmt);
2223 return tmp_var;
2226 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2227 and simple arithmetic operations to be vectorized. Record the statements
2228 produced in STMT_VINFO and return the last statement in the sequence or
2229 NULL if it's not possible to synthesize such a multiplication.
2230 This function mirrors the behavior of expand_mult_const in expmed.c but
2231 works on tree-ssa form. */
2233 static gimple *
2234 vect_synth_mult_by_constant (tree op, tree val,
2235 stmt_vec_info stmt_vinfo)
2237 tree itype = TREE_TYPE (op);
2238 machine_mode mode = TYPE_MODE (itype);
2239 struct algorithm alg;
2240 mult_variant variant;
2241 if (!tree_fits_shwi_p (val))
2242 return NULL;
2244 /* Multiplication synthesis by shifts, adds and subs can introduce
2245 signed overflow where the original operation didn't. Perform the
2246 operations on an unsigned type and cast back to avoid this.
2247 In the future we may want to relax this for synthesis algorithms
2248 that we can prove do not cause unexpected overflow. */
2249 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2251 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2253 /* Targets that don't support vector shifts but support vector additions
2254 can synthesize shifts that way. */
2255 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2257 HOST_WIDE_INT hwval = tree_to_shwi (val);
2258 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2259 The vectorizer's benefit analysis will decide whether it's beneficial
2260 to do this. */
2261 bool possible = choose_mult_variant (mode, hwval, &alg,
2262 &variant, MAX_COST);
2263 if (!possible)
2264 return NULL;
2266 tree vectype = get_vectype_for_scalar_type (multtype);
2268 if (!vectype
2269 || !target_supports_mult_synth_alg (&alg, variant,
2270 vectype, synth_shift_p))
2271 return NULL;
2273 tree accumulator;
2275 /* Clear out the sequence of statements so we can populate it below. */
2276 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2277 gimple *stmt = NULL;
2279 if (cast_to_unsigned_p)
2281 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2282 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2283 append_pattern_def_seq (stmt_vinfo, stmt);
2284 op = tmp_op;
2287 if (alg.op[0] == alg_zero)
2288 accumulator = build_int_cst (multtype, 0);
2289 else
2290 accumulator = op;
2292 bool needs_fixup = (variant == negate_variant)
2293 || (variant == add_variant);
2295 for (int i = 1; i < alg.ops; i++)
2297 tree shft_log = build_int_cst (multtype, alg.log[i]);
2298 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2299 tree tmp_var = NULL_TREE;
2301 switch (alg.op[i])
2303 case alg_shift:
2304 if (synth_shift_p)
2305 stmt
2306 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2307 stmt_vinfo);
2308 else
2309 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2310 shft_log);
2311 break;
2312 case alg_add_t_m2:
2313 tmp_var
2314 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2315 stmt_vinfo, synth_shift_p);
2316 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2317 tmp_var);
2318 break;
2319 case alg_sub_t_m2:
2320 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2321 shft_log, stmt_vinfo,
2322 synth_shift_p);
2323 /* In some algorithms the first step involves zeroing the
2324 accumulator. If subtracting from such an accumulator
2325 just emit the negation directly. */
2326 if (integer_zerop (accumulator))
2327 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2328 else
2329 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2330 tmp_var);
2331 break;
2332 case alg_add_t2_m:
2333 tmp_var
2334 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2335 stmt_vinfo, synth_shift_p);
2336 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2337 break;
2338 case alg_sub_t2_m:
2339 tmp_var
2340 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2341 stmt_vinfo, synth_shift_p);
2342 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2343 break;
2344 case alg_add_factor:
2345 tmp_var
2346 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2347 stmt_vinfo, synth_shift_p);
2348 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2349 tmp_var);
2350 break;
2351 case alg_sub_factor:
2352 tmp_var
2353 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2354 stmt_vinfo, synth_shift_p);
2355 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2356 accumulator);
2357 break;
2358 default:
2359 gcc_unreachable ();
2361 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2362 but rather return it directly. */
2364 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2365 append_pattern_def_seq (stmt_vinfo, stmt);
2366 accumulator = accum_tmp;
2368 if (variant == negate_variant)
2370 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2371 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2372 accumulator = accum_tmp;
2373 if (cast_to_unsigned_p)
2374 append_pattern_def_seq (stmt_vinfo, stmt);
2376 else if (variant == add_variant)
2378 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2379 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2380 accumulator = accum_tmp;
2381 if (cast_to_unsigned_p)
2382 append_pattern_def_seq (stmt_vinfo, stmt);
2384 /* Move back to a signed if needed. */
2385 if (cast_to_unsigned_p)
2387 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2388 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2391 return stmt;
2394 /* Detect multiplication by constant and convert it into a sequence of
2395 shifts and additions, subtractions, negations. We reuse the
2396 choose_mult_variant algorithms from expmed.c
2398 Input/Output:
2400 STMTS: Contains a stmt from which the pattern search begins,
2401 i.e. the mult stmt.
2403 Output:
2405 * TYPE_OUT: The type of the output of this pattern.
2407 * Return value: A new stmt that will be used to replace
2408 the multiplication. */
2410 static gimple *
2411 vect_recog_mult_pattern (vec<gimple *> *stmts, tree *type_out)
2413 gimple *last_stmt = stmts->pop ();
2414 tree oprnd0, oprnd1, vectype, itype;
2415 gimple *pattern_stmt;
2416 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2418 if (!is_gimple_assign (last_stmt))
2419 return NULL;
2421 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2422 return NULL;
2424 oprnd0 = gimple_assign_rhs1 (last_stmt);
2425 oprnd1 = gimple_assign_rhs2 (last_stmt);
2426 itype = TREE_TYPE (oprnd0);
2428 if (TREE_CODE (oprnd0) != SSA_NAME
2429 || TREE_CODE (oprnd1) != INTEGER_CST
2430 || !INTEGRAL_TYPE_P (itype)
2431 || !type_has_mode_precision_p (itype))
2432 return NULL;
2434 vectype = get_vectype_for_scalar_type (itype);
2435 if (vectype == NULL_TREE)
2436 return NULL;
2438 /* If the target can handle vectorized multiplication natively,
2439 don't attempt to optimize this. */
2440 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2441 if (mul_optab != unknown_optab)
2443 machine_mode vec_mode = TYPE_MODE (vectype);
2444 int icode = (int) optab_handler (mul_optab, vec_mode);
2445 if (icode != CODE_FOR_nothing)
2446 return NULL;
2449 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2450 if (!pattern_stmt)
2451 return NULL;
2453 /* Pattern detected. */
2454 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2456 stmts->safe_push (last_stmt);
2457 *type_out = vectype;
2459 return pattern_stmt;
2462 /* Detect a signed division by a constant that wouldn't be
2463 otherwise vectorized:
2465 type a_t, b_t;
2467 S1 a_t = b_t / N;
2469 where type 'type' is an integral type and N is a constant.
2471 Similarly handle modulo by a constant:
2473 S4 a_t = b_t % N;
2475 Input/Output:
2477 * STMTS: Contains a stmt from which the pattern search begins,
2478 i.e. the division stmt. S1 is replaced by if N is a power
2479 of two constant and type is signed:
2480 S3 y_t = b_t < 0 ? N - 1 : 0;
2481 S2 x_t = b_t + y_t;
2482 S1' a_t = x_t >> log2 (N);
2484 S4 is replaced if N is a power of two constant and
2485 type is signed by (where *_T temporaries have unsigned type):
2486 S9 y_T = b_t < 0 ? -1U : 0U;
2487 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2488 S7 z_t = (type) z_T;
2489 S6 w_t = b_t + z_t;
2490 S5 x_t = w_t & (N - 1);
2491 S4' a_t = x_t - z_t;
2493 Output:
2495 * TYPE_OUT: The type of the output of this pattern.
2497 * Return value: A new stmt that will be used to replace the division
2498 S1 or modulo S4 stmt. */
2500 static gimple *
2501 vect_recog_divmod_pattern (vec<gimple *> *stmts, tree *type_out)
2503 gimple *last_stmt = stmts->pop ();
2504 tree oprnd0, oprnd1, vectype, itype, cond;
2505 gimple *pattern_stmt, *def_stmt;
2506 enum tree_code rhs_code;
2507 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2508 vec_info *vinfo = stmt_vinfo->vinfo;
2509 optab optab;
2510 tree q;
2511 int dummy_int, prec;
2512 stmt_vec_info def_stmt_vinfo;
2514 if (!is_gimple_assign (last_stmt))
2515 return NULL;
2517 rhs_code = gimple_assign_rhs_code (last_stmt);
2518 switch (rhs_code)
2520 case TRUNC_DIV_EXPR:
2521 case TRUNC_MOD_EXPR:
2522 break;
2523 default:
2524 return NULL;
2527 oprnd0 = gimple_assign_rhs1 (last_stmt);
2528 oprnd1 = gimple_assign_rhs2 (last_stmt);
2529 itype = TREE_TYPE (oprnd0);
2530 if (TREE_CODE (oprnd0) != SSA_NAME
2531 || TREE_CODE (oprnd1) != INTEGER_CST
2532 || TREE_CODE (itype) != INTEGER_TYPE
2533 || !type_has_mode_precision_p (itype))
2534 return NULL;
2536 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2537 vectype = get_vectype_for_scalar_type (itype);
2538 if (vectype == NULL_TREE)
2539 return NULL;
2541 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2543 /* If the target can handle vectorized division or modulo natively,
2544 don't attempt to optimize this, since native division is likely
2545 to give smaller code. */
2546 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2547 if (optab != unknown_optab)
2549 machine_mode vec_mode = TYPE_MODE (vectype);
2550 int icode = (int) optab_handler (optab, vec_mode);
2551 if (icode != CODE_FOR_nothing)
2552 return NULL;
2556 prec = TYPE_PRECISION (itype);
2557 if (integer_pow2p (oprnd1))
2559 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2560 return NULL;
2562 /* Pattern detected. */
2563 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2565 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2566 build_int_cst (itype, 0));
2567 if (rhs_code == TRUNC_DIV_EXPR)
2569 tree var = vect_recog_temp_ssa_var (itype, NULL);
2570 tree shift;
2571 def_stmt
2572 = gimple_build_assign (var, COND_EXPR, cond,
2573 fold_build2 (MINUS_EXPR, itype, oprnd1,
2574 build_int_cst (itype, 1)),
2575 build_int_cst (itype, 0));
2576 new_pattern_def_seq (stmt_vinfo, def_stmt);
2577 var = vect_recog_temp_ssa_var (itype, NULL);
2578 def_stmt
2579 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2580 gimple_assign_lhs (def_stmt));
2581 append_pattern_def_seq (stmt_vinfo, def_stmt);
2583 shift = build_int_cst (itype, tree_log2 (oprnd1));
2584 pattern_stmt
2585 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2586 RSHIFT_EXPR, var, shift);
2588 else
2590 tree signmask;
2591 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2592 if (compare_tree_int (oprnd1, 2) == 0)
2594 signmask = vect_recog_temp_ssa_var (itype, NULL);
2595 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2596 build_int_cst (itype, 1),
2597 build_int_cst (itype, 0));
2598 append_pattern_def_seq (stmt_vinfo, def_stmt);
2600 else
2602 tree utype
2603 = build_nonstandard_integer_type (prec, 1);
2604 tree vecutype = get_vectype_for_scalar_type (utype);
2605 tree shift
2606 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2607 - tree_log2 (oprnd1));
2608 tree var = vect_recog_temp_ssa_var (utype, NULL);
2610 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2611 build_int_cst (utype, -1),
2612 build_int_cst (utype, 0));
2613 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2614 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2615 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2616 append_pattern_def_seq (stmt_vinfo, def_stmt);
2617 var = vect_recog_temp_ssa_var (utype, NULL);
2618 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2619 gimple_assign_lhs (def_stmt),
2620 shift);
2621 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2622 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2623 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2624 append_pattern_def_seq (stmt_vinfo, def_stmt);
2625 signmask = vect_recog_temp_ssa_var (itype, NULL);
2626 def_stmt
2627 = gimple_build_assign (signmask, NOP_EXPR, var);
2628 append_pattern_def_seq (stmt_vinfo, def_stmt);
2630 def_stmt
2631 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2632 PLUS_EXPR, oprnd0, signmask);
2633 append_pattern_def_seq (stmt_vinfo, def_stmt);
2634 def_stmt
2635 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2636 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2637 fold_build2 (MINUS_EXPR, itype, oprnd1,
2638 build_int_cst (itype, 1)));
2639 append_pattern_def_seq (stmt_vinfo, def_stmt);
2641 pattern_stmt
2642 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2643 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2644 signmask);
2647 stmts->safe_push (last_stmt);
2649 *type_out = vectype;
2650 return pattern_stmt;
2653 if (prec > HOST_BITS_PER_WIDE_INT
2654 || integer_zerop (oprnd1))
2655 return NULL;
2657 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2658 return NULL;
2660 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2662 if (TYPE_UNSIGNED (itype))
2664 unsigned HOST_WIDE_INT mh, ml;
2665 int pre_shift, post_shift;
2666 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2667 & GET_MODE_MASK (itype_mode));
2668 tree t1, t2, t3, t4;
2670 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2671 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2672 return NULL;
2674 /* Find a suitable multiplier and right shift count
2675 instead of multiplying with D. */
2676 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2678 /* If the suggested multiplier is more than SIZE bits, we can do better
2679 for even divisors, using an initial right shift. */
2680 if (mh != 0 && (d & 1) == 0)
2682 pre_shift = ctz_or_zero (d);
2683 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2684 &ml, &post_shift, &dummy_int);
2685 gcc_assert (!mh);
2687 else
2688 pre_shift = 0;
2690 if (mh != 0)
2692 if (post_shift - 1 >= prec)
2693 return NULL;
2695 /* t1 = oprnd0 h* ml;
2696 t2 = oprnd0 - t1;
2697 t3 = t2 >> 1;
2698 t4 = t1 + t3;
2699 q = t4 >> (post_shift - 1); */
2700 t1 = vect_recog_temp_ssa_var (itype, NULL);
2701 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2702 build_int_cst (itype, ml));
2703 append_pattern_def_seq (stmt_vinfo, def_stmt);
2705 t2 = vect_recog_temp_ssa_var (itype, NULL);
2706 def_stmt
2707 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2708 append_pattern_def_seq (stmt_vinfo, def_stmt);
2710 t3 = vect_recog_temp_ssa_var (itype, NULL);
2711 def_stmt
2712 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2713 append_pattern_def_seq (stmt_vinfo, def_stmt);
2715 t4 = vect_recog_temp_ssa_var (itype, NULL);
2716 def_stmt
2717 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2719 if (post_shift != 1)
2721 append_pattern_def_seq (stmt_vinfo, def_stmt);
2723 q = vect_recog_temp_ssa_var (itype, NULL);
2724 pattern_stmt
2725 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2726 build_int_cst (itype, post_shift - 1));
2728 else
2730 q = t4;
2731 pattern_stmt = def_stmt;
2734 else
2736 if (pre_shift >= prec || post_shift >= prec)
2737 return NULL;
2739 /* t1 = oprnd0 >> pre_shift;
2740 t2 = t1 h* ml;
2741 q = t2 >> post_shift; */
2742 if (pre_shift)
2744 t1 = vect_recog_temp_ssa_var (itype, NULL);
2745 def_stmt
2746 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2747 build_int_cst (NULL, pre_shift));
2748 append_pattern_def_seq (stmt_vinfo, def_stmt);
2750 else
2751 t1 = oprnd0;
2753 t2 = vect_recog_temp_ssa_var (itype, NULL);
2754 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2755 build_int_cst (itype, ml));
2757 if (post_shift)
2759 append_pattern_def_seq (stmt_vinfo, def_stmt);
2761 q = vect_recog_temp_ssa_var (itype, NULL);
2762 def_stmt
2763 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2764 build_int_cst (itype, post_shift));
2766 else
2767 q = t2;
2769 pattern_stmt = def_stmt;
2772 else
2774 unsigned HOST_WIDE_INT ml;
2775 int post_shift;
2776 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2777 unsigned HOST_WIDE_INT abs_d;
2778 bool add = false;
2779 tree t1, t2, t3, t4;
2781 /* Give up for -1. */
2782 if (d == -1)
2783 return NULL;
2785 /* Since d might be INT_MIN, we have to cast to
2786 unsigned HOST_WIDE_INT before negating to avoid
2787 undefined signed overflow. */
2788 abs_d = (d >= 0
2789 ? (unsigned HOST_WIDE_INT) d
2790 : - (unsigned HOST_WIDE_INT) d);
2792 /* n rem d = n rem -d */
2793 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2795 d = abs_d;
2796 oprnd1 = build_int_cst (itype, abs_d);
2798 else if (HOST_BITS_PER_WIDE_INT >= prec
2799 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2800 /* This case is not handled correctly below. */
2801 return NULL;
2803 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2804 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2806 add = true;
2807 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2809 if (post_shift >= prec)
2810 return NULL;
2812 /* t1 = oprnd0 h* ml; */
2813 t1 = vect_recog_temp_ssa_var (itype, NULL);
2814 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2815 build_int_cst (itype, ml));
2817 if (add)
2819 /* t2 = t1 + oprnd0; */
2820 append_pattern_def_seq (stmt_vinfo, def_stmt);
2821 t2 = vect_recog_temp_ssa_var (itype, NULL);
2822 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2824 else
2825 t2 = t1;
2827 if (post_shift)
2829 /* t3 = t2 >> post_shift; */
2830 append_pattern_def_seq (stmt_vinfo, def_stmt);
2831 t3 = vect_recog_temp_ssa_var (itype, NULL);
2832 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2833 build_int_cst (itype, post_shift));
2835 else
2836 t3 = t2;
2838 wide_int oprnd0_min, oprnd0_max;
2839 int msb = 1;
2840 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2842 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2843 msb = 0;
2844 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2845 msb = -1;
2848 if (msb == 0 && d >= 0)
2850 /* q = t3; */
2851 q = t3;
2852 pattern_stmt = def_stmt;
2854 else
2856 /* t4 = oprnd0 >> (prec - 1);
2857 or if we know from VRP that oprnd0 >= 0
2858 t4 = 0;
2859 or if we know from VRP that oprnd0 < 0
2860 t4 = -1; */
2861 append_pattern_def_seq (stmt_vinfo, def_stmt);
2862 t4 = vect_recog_temp_ssa_var (itype, NULL);
2863 if (msb != 1)
2864 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2865 build_int_cst (itype, msb));
2866 else
2867 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2868 build_int_cst (itype, prec - 1));
2869 append_pattern_def_seq (stmt_vinfo, def_stmt);
2871 /* q = t3 - t4; or q = t4 - t3; */
2872 q = vect_recog_temp_ssa_var (itype, NULL);
2873 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2874 d < 0 ? t3 : t4);
2878 if (rhs_code == TRUNC_MOD_EXPR)
2880 tree r, t1;
2882 /* We divided. Now finish by:
2883 t1 = q * oprnd1;
2884 r = oprnd0 - t1; */
2885 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2887 t1 = vect_recog_temp_ssa_var (itype, NULL);
2888 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2889 append_pattern_def_seq (stmt_vinfo, def_stmt);
2891 r = vect_recog_temp_ssa_var (itype, NULL);
2892 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2895 /* Pattern detected. */
2896 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2898 stmts->safe_push (last_stmt);
2900 *type_out = vectype;
2901 return pattern_stmt;
2904 /* Function vect_recog_mixed_size_cond_pattern
2906 Try to find the following pattern:
2908 type x_t, y_t;
2909 TYPE a_T, b_T, c_T;
2910 loop:
2911 S1 a_T = x_t CMP y_t ? b_T : c_T;
2913 where type 'TYPE' is an integral type which has different size
2914 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2915 than 'type', the constants need to fit into an integer type
2916 with the same width as 'type') or results of conversion from 'type'.
2918 Input:
2920 * LAST_STMT: A stmt from which the pattern search begins.
2922 Output:
2924 * TYPE_OUT: The type of the output of this pattern.
2926 * Return value: A new stmt that will be used to replace the pattern.
2927 Additionally a def_stmt is added.
2929 a_it = x_t CMP y_t ? b_it : c_it;
2930 a_T = (TYPE) a_it; */
2932 static gimple *
2933 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_out)
2935 gimple *last_stmt = (*stmts)[0];
2936 tree cond_expr, then_clause, else_clause;
2937 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2938 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2939 gimple *pattern_stmt, *def_stmt;
2940 vec_info *vinfo = stmt_vinfo->vinfo;
2941 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2942 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
2943 bool promotion;
2944 tree comp_scalar_type;
2946 if (!is_gimple_assign (last_stmt)
2947 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2948 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2949 return NULL;
2951 cond_expr = gimple_assign_rhs1 (last_stmt);
2952 then_clause = gimple_assign_rhs2 (last_stmt);
2953 else_clause = gimple_assign_rhs3 (last_stmt);
2955 if (!COMPARISON_CLASS_P (cond_expr))
2956 return NULL;
2958 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2959 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2960 if (comp_vectype == NULL_TREE)
2961 return NULL;
2963 type = gimple_expr_type (last_stmt);
2964 if (types_compatible_p (type, comp_scalar_type)
2965 || ((TREE_CODE (then_clause) != INTEGER_CST
2966 || TREE_CODE (else_clause) != INTEGER_CST)
2967 && !INTEGRAL_TYPE_P (comp_scalar_type))
2968 || !INTEGRAL_TYPE_P (type))
2969 return NULL;
2971 if ((TREE_CODE (then_clause) != INTEGER_CST
2972 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2973 &def_stmt0, &promotion))
2974 || (TREE_CODE (else_clause) != INTEGER_CST
2975 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2976 &def_stmt1, &promotion)))
2977 return NULL;
2979 if (orig_type0 && orig_type1
2980 && !types_compatible_p (orig_type0, orig_type1))
2981 return NULL;
2983 if (orig_type0)
2985 if (!types_compatible_p (orig_type0, comp_scalar_type))
2986 return NULL;
2987 then_clause = gimple_assign_rhs1 (def_stmt0);
2988 itype = orig_type0;
2991 if (orig_type1)
2993 if (!types_compatible_p (orig_type1, comp_scalar_type))
2994 return NULL;
2995 else_clause = gimple_assign_rhs1 (def_stmt1);
2996 itype = orig_type1;
3000 HOST_WIDE_INT cmp_mode_size
3001 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3003 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3004 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3005 return NULL;
3007 vectype = get_vectype_for_scalar_type (type);
3008 if (vectype == NULL_TREE)
3009 return NULL;
3011 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3012 return NULL;
3014 if (itype == NULL_TREE)
3015 itype = build_nonstandard_integer_type (cmp_mode_size,
3016 TYPE_UNSIGNED (type));
3018 if (itype == NULL_TREE
3019 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3020 return NULL;
3022 vecitype = get_vectype_for_scalar_type (itype);
3023 if (vecitype == NULL_TREE)
3024 return NULL;
3026 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3027 return NULL;
3029 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3031 if ((TREE_CODE (then_clause) == INTEGER_CST
3032 && !int_fits_type_p (then_clause, itype))
3033 || (TREE_CODE (else_clause) == INTEGER_CST
3034 && !int_fits_type_p (else_clause, itype)))
3035 return NULL;
3038 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3039 COND_EXPR, unshare_expr (cond_expr),
3040 fold_convert (itype, then_clause),
3041 fold_convert (itype, else_clause));
3042 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3043 NOP_EXPR, gimple_assign_lhs (def_stmt));
3045 new_pattern_def_seq (stmt_vinfo, def_stmt);
3046 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3047 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3048 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3049 *type_out = vectype;
3051 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3053 return pattern_stmt;
3057 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3058 true if bool VAR can and should be optimized that way. Assume it shouldn't
3059 in case it's a result of a comparison which can be directly vectorized into
3060 a vector comparison. Fills in STMTS with all stmts visited during the
3061 walk. */
3063 static bool
3064 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3066 tree rhs1;
3067 enum tree_code rhs_code;
3069 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3070 if (!def_stmt_info)
3071 return false;
3073 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3074 if (!def_stmt)
3075 return false;
3077 if (stmts.contains (def_stmt))
3078 return true;
3080 rhs1 = gimple_assign_rhs1 (def_stmt);
3081 rhs_code = gimple_assign_rhs_code (def_stmt);
3082 switch (rhs_code)
3084 case SSA_NAME:
3085 if (! check_bool_pattern (rhs1, vinfo, stmts))
3086 return false;
3087 break;
3089 CASE_CONVERT:
3090 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3091 return false;
3092 if (! check_bool_pattern (rhs1, vinfo, stmts))
3093 return false;
3094 break;
3096 case BIT_NOT_EXPR:
3097 if (! check_bool_pattern (rhs1, vinfo, stmts))
3098 return false;
3099 break;
3101 case BIT_AND_EXPR:
3102 case BIT_IOR_EXPR:
3103 case BIT_XOR_EXPR:
3104 if (! check_bool_pattern (rhs1, vinfo, stmts)
3105 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3106 return false;
3107 break;
3109 default:
3110 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3112 tree vecitype, comp_vectype;
3114 /* If the comparison can throw, then is_gimple_condexpr will be
3115 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3116 if (stmt_could_throw_p (def_stmt))
3117 return false;
3119 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3120 if (comp_vectype == NULL_TREE)
3121 return false;
3123 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3124 if (mask_type
3125 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3126 return false;
3128 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3130 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3131 tree itype
3132 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3133 vecitype = get_vectype_for_scalar_type (itype);
3134 if (vecitype == NULL_TREE)
3135 return false;
3137 else
3138 vecitype = comp_vectype;
3139 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3140 return false;
3142 else
3143 return false;
3144 break;
3147 bool res = stmts.add (def_stmt);
3148 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3149 gcc_assert (!res);
3151 return true;
3155 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3156 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3157 pattern sequence. */
3159 static tree
3160 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3162 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3163 NOP_EXPR, var);
3164 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3165 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3166 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3167 append_pattern_def_seq (stmt_info, cast_stmt);
3168 return gimple_assign_lhs (cast_stmt);
3171 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3172 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3173 type, OUT_TYPE is the desired final integer type of the whole pattern.
3174 STMT_INFO is the info of the pattern root and is where pattern stmts should
3175 be associated with. DEFS is a map of pattern defs. */
3177 static void
3178 adjust_bool_pattern (tree var, tree out_type,
3179 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3181 gimple *stmt = SSA_NAME_DEF_STMT (var);
3182 enum tree_code rhs_code, def_rhs_code;
3183 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3184 location_t loc;
3185 gimple *pattern_stmt, *def_stmt;
3186 tree trueval = NULL_TREE;
3188 rhs1 = gimple_assign_rhs1 (stmt);
3189 rhs2 = gimple_assign_rhs2 (stmt);
3190 rhs_code = gimple_assign_rhs_code (stmt);
3191 loc = gimple_location (stmt);
3192 switch (rhs_code)
3194 case SSA_NAME:
3195 CASE_CONVERT:
3196 irhs1 = *defs.get (rhs1);
3197 itype = TREE_TYPE (irhs1);
3198 pattern_stmt
3199 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3200 SSA_NAME, irhs1);
3201 break;
3203 case BIT_NOT_EXPR:
3204 irhs1 = *defs.get (rhs1);
3205 itype = TREE_TYPE (irhs1);
3206 pattern_stmt
3207 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3208 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3209 break;
3211 case BIT_AND_EXPR:
3212 /* Try to optimize x = y & (a < b ? 1 : 0); into
3213 x = (a < b ? y : 0);
3215 E.g. for:
3216 bool a_b, b_b, c_b;
3217 TYPE d_T;
3219 S1 a_b = x1 CMP1 y1;
3220 S2 b_b = x2 CMP2 y2;
3221 S3 c_b = a_b & b_b;
3222 S4 d_T = (TYPE) c_b;
3224 we would normally emit:
3226 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3227 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3228 S3' c_T = a_T & b_T;
3229 S4' d_T = c_T;
3231 but we can save one stmt by using the
3232 result of one of the COND_EXPRs in the other COND_EXPR and leave
3233 BIT_AND_EXPR stmt out:
3235 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3236 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3237 S4' f_T = c_T;
3239 At least when VEC_COND_EXPR is implemented using masks
3240 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3241 computes the comparison masks and ands it, in one case with
3242 all ones vector, in the other case with a vector register.
3243 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3244 often more expensive. */
3245 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3246 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3247 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3249 irhs1 = *defs.get (rhs1);
3250 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3251 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3252 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3254 rhs_code = def_rhs_code;
3255 rhs1 = def_rhs1;
3256 rhs2 = gimple_assign_rhs2 (def_stmt);
3257 trueval = irhs1;
3258 goto do_compare;
3260 else
3261 irhs2 = *defs.get (rhs2);
3262 goto and_ior_xor;
3264 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3265 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3266 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3268 irhs2 = *defs.get (rhs2);
3269 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3270 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3271 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3273 rhs_code = def_rhs_code;
3274 rhs1 = def_rhs1;
3275 rhs2 = gimple_assign_rhs2 (def_stmt);
3276 trueval = irhs2;
3277 goto do_compare;
3279 else
3280 irhs1 = *defs.get (rhs1);
3281 goto and_ior_xor;
3283 /* FALLTHRU */
3284 case BIT_IOR_EXPR:
3285 case BIT_XOR_EXPR:
3286 irhs1 = *defs.get (rhs1);
3287 irhs2 = *defs.get (rhs2);
3288 and_ior_xor:
3289 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3290 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3292 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3293 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3294 int out_prec = TYPE_PRECISION (out_type);
3295 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3296 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3297 stmt_info);
3298 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3299 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3300 stmt_info);
3301 else
3303 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3304 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3307 itype = TREE_TYPE (irhs1);
3308 pattern_stmt
3309 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3310 rhs_code, irhs1, irhs2);
3311 break;
3313 default:
3314 do_compare:
3315 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3316 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3317 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3318 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3319 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3321 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3322 itype
3323 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3325 else
3326 itype = TREE_TYPE (rhs1);
3327 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3328 if (trueval == NULL_TREE)
3329 trueval = build_int_cst (itype, 1);
3330 else
3331 gcc_checking_assert (useless_type_conversion_p (itype,
3332 TREE_TYPE (trueval)));
3333 pattern_stmt
3334 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3335 COND_EXPR, cond_expr, trueval,
3336 build_int_cst (itype, 0));
3337 break;
3340 gimple_set_location (pattern_stmt, loc);
3341 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3342 pattern def seq stmts instead of just letting auto-detection do
3343 its work? */
3344 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3345 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3346 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3347 append_pattern_def_seq (stmt_info, pattern_stmt);
3348 defs.put (var, gimple_assign_lhs (pattern_stmt));
3351 /* Comparison function to qsort a vector of gimple stmts after UID. */
3353 static int
3354 sort_after_uid (const void *p1, const void *p2)
3356 const gimple *stmt1 = *(const gimple * const *)p1;
3357 const gimple *stmt2 = *(const gimple * const *)p2;
3358 return gimple_uid (stmt1) - gimple_uid (stmt2);
3361 /* Create pattern stmts for all stmts participating in the bool pattern
3362 specified by BOOL_STMT_SET and its root STMT with the desired type
3363 OUT_TYPE. Return the def of the pattern root. */
3365 static tree
3366 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3367 tree out_type, gimple *stmt)
3369 /* Gather original stmts in the bool pattern in their order of appearance
3370 in the IL. */
3371 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3372 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3373 i != bool_stmt_set.end (); ++i)
3374 bool_stmts.quick_push (*i);
3375 bool_stmts.qsort (sort_after_uid);
3377 /* Now process them in that order, producing pattern stmts. */
3378 hash_map <tree, tree> defs;
3379 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3380 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3381 out_type, vinfo_for_stmt (stmt), defs);
3383 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3384 gimple *pattern_stmt
3385 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3386 return gimple_assign_lhs (pattern_stmt);
3389 /* Helper for search_type_for_mask. */
3391 static tree
3392 search_type_for_mask_1 (tree var, vec_info *vinfo,
3393 hash_map<gimple *, tree> &cache)
3395 tree rhs1;
3396 enum tree_code rhs_code;
3397 tree res = NULL_TREE, res2;
3399 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3400 return NULL_TREE;
3402 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3403 if (!def_stmt_info)
3404 return NULL_TREE;
3406 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3407 if (!def_stmt)
3408 return NULL_TREE;
3410 tree *c = cache.get (def_stmt);
3411 if (c)
3412 return *c;
3414 rhs_code = gimple_assign_rhs_code (def_stmt);
3415 rhs1 = gimple_assign_rhs1 (def_stmt);
3417 switch (rhs_code)
3419 case SSA_NAME:
3420 case BIT_NOT_EXPR:
3421 CASE_CONVERT:
3422 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3423 break;
3425 case BIT_AND_EXPR:
3426 case BIT_IOR_EXPR:
3427 case BIT_XOR_EXPR:
3428 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3429 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3430 cache);
3431 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3432 res = res2;
3433 break;
3435 default:
3436 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3438 tree comp_vectype, mask_type;
3440 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3442 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3443 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3444 vinfo, cache);
3445 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3446 res = res2;
3447 break;
3450 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3451 if (comp_vectype == NULL_TREE)
3453 res = NULL_TREE;
3454 break;
3457 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3458 if (!mask_type
3459 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3461 res = NULL_TREE;
3462 break;
3465 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3466 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3468 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3469 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3471 else
3472 res = TREE_TYPE (rhs1);
3476 cache.put (def_stmt, res);
3477 return res;
3480 /* Return the proper type for converting bool VAR into
3481 an integer value or NULL_TREE if no such type exists.
3482 The type is chosen so that converted value has the
3483 same number of elements as VAR's vector type. */
3485 static tree
3486 search_type_for_mask (tree var, vec_info *vinfo)
3488 hash_map<gimple *, tree> cache;
3489 return search_type_for_mask_1 (var, vinfo, cache);
3492 /* Function vect_recog_bool_pattern
3494 Try to find pattern like following:
3496 bool a_b, b_b, c_b, d_b, e_b;
3497 TYPE f_T;
3498 loop:
3499 S1 a_b = x1 CMP1 y1;
3500 S2 b_b = x2 CMP2 y2;
3501 S3 c_b = a_b & b_b;
3502 S4 d_b = x3 CMP3 y3;
3503 S5 e_b = c_b | d_b;
3504 S6 f_T = (TYPE) e_b;
3506 where type 'TYPE' is an integral type. Or a similar pattern
3507 ending in
3509 S6 f_Y = e_b ? r_Y : s_Y;
3511 as results from if-conversion of a complex condition.
3513 Input:
3515 * LAST_STMT: A stmt at the end from which the pattern
3516 search begins, i.e. cast of a bool to
3517 an integer type.
3519 Output:
3521 * TYPE_OUT: The type of the output of this pattern.
3523 * Return value: A new stmt that will be used to replace the pattern.
3525 Assuming size of TYPE is the same as size of all comparisons
3526 (otherwise some casts would be added where needed), the above
3527 sequence we create related pattern stmts:
3528 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3529 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3530 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3531 S5' e_T = c_T | d_T;
3532 S6' f_T = e_T;
3534 Instead of the above S3' we could emit:
3535 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3536 S3' c_T = a_T | b_T;
3537 but the above is more efficient. */
3539 static gimple *
3540 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_out)
3542 gimple *last_stmt = stmts->pop ();
3543 enum tree_code rhs_code;
3544 tree var, lhs, rhs, vectype;
3545 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3546 stmt_vec_info new_stmt_info;
3547 vec_info *vinfo = stmt_vinfo->vinfo;
3548 gimple *pattern_stmt;
3550 if (!is_gimple_assign (last_stmt))
3551 return NULL;
3553 var = gimple_assign_rhs1 (last_stmt);
3554 lhs = gimple_assign_lhs (last_stmt);
3556 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3557 return NULL;
3559 hash_set<gimple *> bool_stmts;
3561 rhs_code = gimple_assign_rhs_code (last_stmt);
3562 if (CONVERT_EXPR_CODE_P (rhs_code))
3564 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3565 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3566 return NULL;
3567 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3568 if (vectype == NULL_TREE)
3569 return NULL;
3571 if (check_bool_pattern (var, vinfo, bool_stmts))
3573 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3574 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3575 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3576 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3577 else
3578 pattern_stmt
3579 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3581 else
3583 tree type = search_type_for_mask (var, vinfo);
3584 tree cst0, cst1, tmp;
3586 if (!type)
3587 return NULL;
3589 /* We may directly use cond with narrowed type to avoid
3590 multiple cond exprs with following result packing and
3591 perform single cond with packed mask instead. In case
3592 of widening we better make cond first and then extract
3593 results. */
3594 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3595 type = TREE_TYPE (lhs);
3597 cst0 = build_int_cst (type, 0);
3598 cst1 = build_int_cst (type, 1);
3599 tmp = vect_recog_temp_ssa_var (type, NULL);
3600 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3602 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3604 tree new_vectype = get_vectype_for_scalar_type (type);
3605 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3606 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3607 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3608 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3610 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3611 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3615 *type_out = vectype;
3616 stmts->safe_push (last_stmt);
3617 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3619 return pattern_stmt;
3621 else if (rhs_code == COND_EXPR
3622 && TREE_CODE (var) == SSA_NAME)
3624 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3625 if (vectype == NULL_TREE)
3626 return NULL;
3628 /* Build a scalar type for the boolean result that when
3629 vectorized matches the vector type of the result in
3630 size and number of elements. */
3631 unsigned prec
3632 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3633 TYPE_VECTOR_SUBPARTS (vectype));
3635 tree type
3636 = build_nonstandard_integer_type (prec,
3637 TYPE_UNSIGNED (TREE_TYPE (var)));
3638 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3639 return NULL;
3641 if (!check_bool_pattern (var, vinfo, bool_stmts))
3642 return NULL;
3644 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3646 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3647 pattern_stmt
3648 = gimple_build_assign (lhs, COND_EXPR,
3649 build2 (NE_EXPR, boolean_type_node,
3650 rhs, build_int_cst (type, 0)),
3651 gimple_assign_rhs2 (last_stmt),
3652 gimple_assign_rhs3 (last_stmt));
3653 *type_out = vectype;
3654 stmts->safe_push (last_stmt);
3655 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3657 return pattern_stmt;
3659 else if (rhs_code == SSA_NAME
3660 && STMT_VINFO_DATA_REF (stmt_vinfo))
3662 stmt_vec_info pattern_stmt_info;
3663 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3664 gcc_assert (vectype != NULL_TREE);
3665 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3666 return NULL;
3668 if (check_bool_pattern (var, vinfo, bool_stmts))
3669 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3670 else
3672 tree type = search_type_for_mask (var, vinfo);
3673 tree cst0, cst1, new_vectype;
3675 if (!type)
3676 return NULL;
3678 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3679 type = TREE_TYPE (vectype);
3681 cst0 = build_int_cst (type, 0);
3682 cst1 = build_int_cst (type, 1);
3683 new_vectype = get_vectype_for_scalar_type (type);
3685 rhs = vect_recog_temp_ssa_var (type, NULL);
3686 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3688 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3689 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3690 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3691 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3694 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3695 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3697 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3698 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3699 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3700 rhs = rhs2;
3702 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3703 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3704 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3705 STMT_VINFO_DATA_REF (pattern_stmt_info)
3706 = STMT_VINFO_DATA_REF (stmt_vinfo);
3707 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3708 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3709 *type_out = vectype;
3710 stmts->safe_push (last_stmt);
3711 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3713 return pattern_stmt;
3715 else
3716 return NULL;
3720 /* A helper for vect_recog_mask_conversion_pattern. Build
3721 conversion of MASK to a type suitable for masking VECTYPE.
3722 Built statement gets required vectype and is appended to
3723 a pattern sequence of STMT_VINFO.
3725 Return converted mask. */
3727 static tree
3728 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3729 vec_info *vinfo)
3731 gimple *stmt;
3732 tree masktype, tmp;
3733 stmt_vec_info new_stmt_info;
3735 masktype = build_same_sized_truth_vector_type (vectype);
3736 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3737 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3738 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3739 set_vinfo_for_stmt (stmt, new_stmt_info);
3740 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3741 append_pattern_def_seq (stmt_vinfo, stmt);
3743 return tmp;
3747 /* Function vect_recog_mask_conversion_pattern
3749 Try to find statements which require boolean type
3750 converison. Additional conversion statements are
3751 added to handle such cases. For example:
3753 bool m_1, m_2, m_3;
3754 int i_4, i_5;
3755 double d_6, d_7;
3756 char c_1, c_2, c_3;
3758 S1 m_1 = i_4 > i_5;
3759 S2 m_2 = d_6 < d_7;
3760 S3 m_3 = m_1 & m_2;
3761 S4 c_1 = m_3 ? c_2 : c_3;
3763 Will be transformed into:
3765 S1 m_1 = i_4 > i_5;
3766 S2 m_2 = d_6 < d_7;
3767 S3'' m_2' = (_Bool[bitsize=32])m_2
3768 S3' m_3' = m_1 & m_2';
3769 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3770 S4' c_1' = m_3'' ? c_2 : c_3; */
3772 static gimple *
3773 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_out)
3775 gimple *last_stmt = stmts->pop ();
3776 enum tree_code rhs_code;
3777 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3778 tree vectype1, vectype2;
3779 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3780 stmt_vec_info pattern_stmt_info;
3781 vec_info *vinfo = stmt_vinfo->vinfo;
3783 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3784 if (is_gimple_call (last_stmt)
3785 && gimple_call_internal_p (last_stmt)
3786 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3787 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3789 gcall *pattern_stmt;
3790 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3792 if (load)
3794 lhs = gimple_call_lhs (last_stmt);
3795 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3797 else
3799 rhs2 = gimple_call_arg (last_stmt, 3);
3800 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3803 rhs1 = gimple_call_arg (last_stmt, 2);
3804 rhs1_type = search_type_for_mask (rhs1, vinfo);
3805 if (!rhs1_type)
3806 return NULL;
3807 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3809 if (!vectype1 || !vectype2
3810 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3811 TYPE_VECTOR_SUBPARTS (vectype2)))
3812 return NULL;
3814 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3816 if (load)
3818 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3819 pattern_stmt
3820 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3821 gimple_call_arg (last_stmt, 0),
3822 gimple_call_arg (last_stmt, 1),
3823 tmp);
3824 gimple_call_set_lhs (pattern_stmt, lhs);
3826 else
3827 pattern_stmt
3828 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3829 gimple_call_arg (last_stmt, 0),
3830 gimple_call_arg (last_stmt, 1),
3831 tmp,
3832 gimple_call_arg (last_stmt, 3));
3834 gimple_call_set_nothrow (pattern_stmt, true);
3836 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3837 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3838 STMT_VINFO_DATA_REF (pattern_stmt_info)
3839 = STMT_VINFO_DATA_REF (stmt_vinfo);
3840 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3841 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3842 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
3843 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo);
3845 *type_out = vectype1;
3846 stmts->safe_push (last_stmt);
3847 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3849 return pattern_stmt;
3852 if (!is_gimple_assign (last_stmt))
3853 return NULL;
3855 gimple *pattern_stmt;
3856 lhs = gimple_assign_lhs (last_stmt);
3857 rhs1 = gimple_assign_rhs1 (last_stmt);
3858 rhs_code = gimple_assign_rhs_code (last_stmt);
3860 /* Check for cond expression requiring mask conversion. */
3861 if (rhs_code == COND_EXPR)
3863 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3865 if (TREE_CODE (rhs1) == SSA_NAME)
3867 rhs1_type = search_type_for_mask (rhs1, vinfo);
3868 if (!rhs1_type)
3869 return NULL;
3871 else if (COMPARISON_CLASS_P (rhs1))
3873 /* Check whether we're comparing scalar booleans and (if so)
3874 whether a better mask type exists than the mask associated
3875 with boolean-sized elements. This avoids unnecessary packs
3876 and unpacks if the booleans are set from comparisons of
3877 wider types. E.g. in:
3879 int x1, x2, x3, x4, y1, y1;
3881 bool b1 = (x1 == x2);
3882 bool b2 = (x3 == x4);
3883 ... = b1 == b2 ? y1 : y2;
3885 it is better for b1 and b2 to use the mask type associated
3886 with int elements rather bool (byte) elements. */
3887 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
3888 if (!rhs1_type)
3889 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3891 else
3892 return NULL;
3894 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3896 if (!vectype1 || !vectype2)
3897 return NULL;
3899 /* Continue if a conversion is needed. Also continue if we have
3900 a comparison whose vector type would normally be different from
3901 VECTYPE2 when considered in isolation. In that case we'll
3902 replace the comparison with an SSA name (so that we can record
3903 its vector type) and behave as though the comparison was an SSA
3904 name from the outset. */
3905 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3906 TYPE_VECTOR_SUBPARTS (vectype2))
3907 && (TREE_CODE (rhs1) == SSA_NAME
3908 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
3909 return NULL;
3911 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
3912 in place, we can handle it in vectorizable_condition. This avoids
3913 unnecessary promotion stmts and increased vectorization factor. */
3914 if (COMPARISON_CLASS_P (rhs1)
3915 && INTEGRAL_TYPE_P (rhs1_type)
3916 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
3917 TYPE_VECTOR_SUBPARTS (vectype2)))
3919 enum vect_def_type dt;
3920 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
3921 && dt == vect_external_def
3922 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
3923 && (dt == vect_external_def
3924 || dt == vect_constant_def))
3926 tree wide_scalar_type = build_nonstandard_integer_type
3927 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
3928 TYPE_UNSIGNED (rhs1_type));
3929 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
3930 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
3931 return NULL;
3935 /* If rhs1 is a comparison we need to move it into a
3936 separate statement. */
3937 if (TREE_CODE (rhs1) != SSA_NAME)
3939 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3940 pattern_stmt = gimple_build_assign (tmp, rhs1);
3941 rhs1 = tmp;
3943 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3944 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3945 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
3946 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3949 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
3950 TYPE_VECTOR_SUBPARTS (vectype2)))
3951 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3952 else
3953 tmp = rhs1;
3955 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3956 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
3957 gimple_assign_rhs2 (last_stmt),
3958 gimple_assign_rhs3 (last_stmt));
3960 *type_out = vectype1;
3961 stmts->safe_push (last_stmt);
3962 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3964 return pattern_stmt;
3967 /* Now check for binary boolean operations requiring conversion for
3968 one of operands. */
3969 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
3970 return NULL;
3972 if (rhs_code != BIT_IOR_EXPR
3973 && rhs_code != BIT_XOR_EXPR
3974 && rhs_code != BIT_AND_EXPR
3975 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
3976 return NULL;
3978 rhs2 = gimple_assign_rhs2 (last_stmt);
3980 rhs1_type = search_type_for_mask (rhs1, vinfo);
3981 rhs2_type = search_type_for_mask (rhs2, vinfo);
3983 if (!rhs1_type || !rhs2_type
3984 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
3985 return NULL;
3987 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
3989 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
3990 if (!vectype1)
3991 return NULL;
3992 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
3994 else
3996 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
3997 if (!vectype1)
3998 return NULL;
3999 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4002 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4003 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4005 *type_out = vectype1;
4006 stmts->safe_push (last_stmt);
4007 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4009 return pattern_stmt;
4012 /* STMT is a load or store. If the load or store is conditional, return
4013 the boolean condition under which it occurs, otherwise return null. */
4015 static tree
4016 vect_get_load_store_mask (gimple *stmt)
4018 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4020 gcc_assert (gimple_assign_single_p (def_assign));
4021 return NULL_TREE;
4024 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4026 internal_fn ifn = gimple_call_internal_fn (def_call);
4027 int mask_index = internal_fn_mask_index (ifn);
4028 return gimple_call_arg (def_call, mask_index);
4031 gcc_unreachable ();
4034 /* Return the scalar offset type that an internal gather/scatter function
4035 should use. GS_INFO describes the gather/scatter operation. */
4037 static tree
4038 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4040 tree offset_type = TREE_TYPE (gs_info->offset);
4041 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4043 /* Enforced by vect_check_gather_scatter. */
4044 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4045 gcc_assert (element_bits >= offset_bits);
4047 /* If the offset is narrower than the elements, extend it according
4048 to its sign. */
4049 if (element_bits > offset_bits)
4050 return build_nonstandard_integer_type (element_bits,
4051 TYPE_UNSIGNED (offset_type));
4053 return offset_type;
4056 /* Return MASK if MASK is suitable for masking an operation on vectors
4057 of type VECTYPE, otherwise convert it into such a form and return
4058 the result. Associate any conversion statements with STMT_INFO's
4059 pattern. */
4061 static tree
4062 vect_convert_mask_for_vectype (tree mask, tree vectype,
4063 stmt_vec_info stmt_info, vec_info *vinfo)
4065 tree mask_type = search_type_for_mask (mask, vinfo);
4066 if (mask_type)
4068 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4069 if (mask_vectype
4070 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4071 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4072 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4074 return mask;
4077 /* Return the equivalent of:
4079 fold_convert (TYPE, VALUE)
4081 with the expectation that the operation will be vectorized.
4082 If new statements are needed, add them as pattern statements
4083 to STMT_INFO. */
4085 static tree
4086 vect_add_conversion_to_patterm (tree type, tree value,
4087 stmt_vec_info stmt_info,
4088 vec_info *vinfo)
4090 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4091 return value;
4093 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4094 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4095 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4096 set_vinfo_for_stmt (conversion, new_stmt_info);
4097 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4098 append_pattern_def_seq (stmt_info, conversion);
4099 return new_value;
4102 /* Try to convert STMT into a call to a gather load or scatter store
4103 internal function. Return the final statement on success and set
4104 *TYPE_OUT to the vector type being loaded or stored.
4106 This function only handles gathers and scatters that were recognized
4107 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4109 static gimple *
4110 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4111 tree *type_out)
4113 /* Currently we only support this for loop vectorization. */
4114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4115 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4116 if (!loop_vinfo)
4117 return NULL;
4119 /* Make sure that we're looking at a gather load or scatter store. */
4120 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4121 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4122 return NULL;
4124 /* Get the boolean that controls whether the load or store happens.
4125 This is null if the operation is unconditional. */
4126 tree mask = vect_get_load_store_mask (stmt);
4128 /* Make sure that the target supports an appropriate internal
4129 function for the gather/scatter operation. */
4130 gather_scatter_info gs_info;
4131 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4132 || gs_info.decl)
4133 return NULL;
4135 /* Convert the mask to the right form. */
4136 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4137 if (mask)
4138 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4139 loop_vinfo);
4141 /* Get the invariant base and non-invariant offset, converting the
4142 latter to the same width as the vector elements. */
4143 tree base = gs_info.base;
4144 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4145 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4146 last_stmt_info, loop_vinfo);
4148 /* Build the new pattern statement. */
4149 tree scale = size_int (gs_info.scale);
4150 gcall *pattern_stmt;
4151 if (DR_IS_READ (dr))
4153 if (mask != NULL)
4154 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4155 offset, scale, mask);
4156 else
4157 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4158 offset, scale);
4159 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4160 gimple_call_set_lhs (pattern_stmt, load_lhs);
4162 else
4164 tree rhs = vect_get_store_rhs (stmt);
4165 if (mask != NULL)
4166 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4167 base, offset, scale, rhs,
4168 mask);
4169 else
4170 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4171 base, offset, scale, rhs);
4173 gimple_call_set_nothrow (pattern_stmt, true);
4175 /* Copy across relevant vectorization info and associate DR with the
4176 new pattern statement instead of the original statement. */
4177 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4178 loop_vinfo);
4179 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4180 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4181 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4182 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4183 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4184 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4186 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4187 *type_out = vectype;
4188 vect_pattern_detected ("gather/scatter pattern", stmt);
4190 return pattern_stmt;
4193 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4195 static gimple *
4196 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_out)
4198 gimple *last_stmt = stmts->pop ();
4199 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4200 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4201 last_stmt_info,
4202 type_out);
4203 if (pattern_stmt)
4204 stmts->safe_push (last_stmt);
4205 return pattern_stmt;
4208 typedef gimple *(*vect_recog_func_ptr) (vec<gimple *> *, tree *);
4210 struct vect_recog_func
4212 vect_recog_func_ptr fn;
4213 const char *name;
4216 /* Note that ordering matters - the first pattern matching on a stmt is
4217 taken which means usually the more complex one needs to preceed the
4218 less comples onex (widen_sum only after dot_prod or sad for example). */
4219 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4220 { vect_recog_widen_mult_pattern, "widen_mult" },
4221 { vect_recog_dot_prod_pattern, "dot_prod" },
4222 { vect_recog_sad_pattern, "sad" },
4223 { vect_recog_widen_sum_pattern, "widen_sum" },
4224 { vect_recog_pow_pattern, "pow" },
4225 { vect_recog_widen_shift_pattern, "widen_shift" },
4226 { vect_recog_over_widening_pattern, "over_widening" },
4227 { vect_recog_rotate_pattern, "rotate" },
4228 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4229 { vect_recog_divmod_pattern, "divmod" },
4230 { vect_recog_mult_pattern, "mult" },
4231 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4232 { vect_recog_bool_pattern, "bool" },
4233 /* This must come before mask conversion, and includes the parts
4234 of mask conversion that are needed for gather and scatter
4235 internal functions. */
4236 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4237 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4240 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4242 /* Mark statements that are involved in a pattern. */
4244 static inline void
4245 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4246 tree pattern_vectype)
4248 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4249 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4251 bool old_pattern_p = is_pattern_stmt_p (orig_stmt_info);
4252 if (old_pattern_p)
4254 /* We're replacing a statement in an existing pattern definition
4255 sequence. */
4256 if (dump_enabled_p ())
4258 dump_printf_loc (MSG_NOTE, vect_location,
4259 "replacing earlier pattern ");
4260 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, orig_stmt, 0);
4263 /* To keep the book-keeping simple, just swap the lhs of the
4264 old and new statements, so that the old one has a valid but
4265 unused lhs. */
4266 tree old_lhs = gimple_get_lhs (orig_stmt);
4267 gimple_set_lhs (orig_stmt, gimple_get_lhs (pattern_stmt));
4268 gimple_set_lhs (pattern_stmt, old_lhs);
4270 if (dump_enabled_p ())
4272 dump_printf_loc (MSG_NOTE, vect_location, "with ");
4273 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4276 /* Switch to the statement that ORIG replaces. */
4277 orig_stmt_info
4278 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (orig_stmt_info));
4280 /* We shouldn't be replacing the main pattern statement. */
4281 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) != orig_stmt);
4284 if (def_seq)
4285 for (gimple_stmt_iterator si = gsi_start (def_seq);
4286 !gsi_end_p (si); gsi_next (&si))
4287 vect_init_pattern_stmt (gsi_stmt (si), orig_stmt_info, pattern_vectype);
4289 if (old_pattern_p)
4291 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4293 /* Insert all the new pattern statements before the original one. */
4294 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4295 gimple_stmt_iterator gsi = gsi_for_stmt (orig_stmt, orig_def_seq);
4296 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4297 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4299 /* Remove the pattern statement that this new pattern replaces. */
4300 gsi_remove (&gsi, false);
4302 else
4303 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4306 /* Function vect_pattern_recog_1
4308 Input:
4309 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4310 computation pattern.
4311 STMT: A stmt from which the pattern search should start.
4313 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4314 a sequence of statements that has the same functionality and can be
4315 used to replace STMT. It returns the last statement in the sequence
4316 and adds any earlier statements to STMT's STMT_VINFO_PATTERN_DEF_SEQ.
4317 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4318 statement, having first checked that the target supports the new operation
4319 in that type.
4321 This function also does some bookkeeping, as explained in the documentation
4322 for vect_recog_pattern. */
4324 static void
4325 vect_pattern_recog_1 (vect_recog_func *recog_func,
4326 gimple_stmt_iterator si,
4327 vec<gimple *> *stmts_to_replace)
4329 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4330 stmt_vec_info stmt_info;
4331 loop_vec_info loop_vinfo;
4332 tree pattern_vectype;
4333 int i;
4335 /* If this statement has already been replaced with pattern statements,
4336 leave the original statement alone, since the first match wins.
4337 Instead try to match against the definition statements that feed
4338 the main pattern statement. */
4339 stmt_info = vinfo_for_stmt (stmt);
4340 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4342 gimple_stmt_iterator gsi;
4343 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4344 !gsi_end_p (gsi); gsi_next (&gsi))
4345 vect_pattern_recog_1 (recog_func, gsi, stmts_to_replace);
4346 return;
4349 stmts_to_replace->truncate (0);
4350 stmts_to_replace->quick_push (stmt);
4351 pattern_stmt = recog_func->fn (stmts_to_replace, &pattern_vectype);
4352 if (!pattern_stmt)
4354 /* Clear related stmt info that analysis might have noted for
4355 to be replaced stmts. */
4356 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4357 && (unsigned) i < stmts_to_replace->length ();
4358 i++)
4360 stmt_info = vinfo_for_stmt (stmt);
4361 if (!is_pattern_stmt_p (stmt_info))
4362 STMT_VINFO_RELATED_STMT (stmt_info) = NULL;
4364 /* Clear any half-formed pattern definition sequence. */
4365 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4366 return;
4369 stmt = stmts_to_replace->last ();
4370 stmt_info = vinfo_for_stmt (stmt);
4371 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4372 gcc_assert (pattern_vectype);
4374 /* Found a vectorizable pattern. */
4375 if (dump_enabled_p ())
4377 dump_printf_loc (MSG_NOTE, vect_location,
4378 "%s pattern recognized: ", recog_func->name);
4379 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4382 /* Mark the stmts that are involved in the pattern. */
4383 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4385 /* Patterns cannot be vectorized using SLP, because they change the order of
4386 computation. */
4387 if (loop_vinfo)
4389 unsigned ix, ix2;
4390 gimple **elem_ptr;
4391 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4392 elem_ptr, *elem_ptr == stmt);
4395 /* It is possible that additional pattern stmts are created and inserted in
4396 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4397 relevant statements. */
4398 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4399 && (unsigned) i < (stmts_to_replace->length () - 1);
4400 i++)
4402 stmt_info = vinfo_for_stmt (stmt);
4403 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4404 if (dump_enabled_p ())
4406 dump_printf_loc (MSG_NOTE, vect_location,
4407 "additional pattern stmt: ");
4408 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4411 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4414 return;
4418 /* Function vect_pattern_recog
4420 Input:
4421 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4422 computation idioms.
4424 Output - for each computation idiom that is detected we create a new stmt
4425 that provides the same functionality and that can be vectorized. We
4426 also record some information in the struct_stmt_info of the relevant
4427 stmts, as explained below:
4429 At the entry to this function we have the following stmts, with the
4430 following initial value in the STMT_VINFO fields:
4432 stmt in_pattern_p related_stmt vec_stmt
4433 S1: a_i = .... - - -
4434 S2: a_2 = ..use(a_i).. - - -
4435 S3: a_1 = ..use(a_2).. - - -
4436 S4: a_0 = ..use(a_1).. - - -
4437 S5: ... = ..use(a_0).. - - -
4439 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4440 represented by a single stmt. We then:
4441 - create a new stmt S6 equivalent to the pattern (the stmt is not
4442 inserted into the code)
4443 - fill in the STMT_VINFO fields as follows:
4445 in_pattern_p related_stmt vec_stmt
4446 S1: a_i = .... - - -
4447 S2: a_2 = ..use(a_i).. - - -
4448 S3: a_1 = ..use(a_2).. - - -
4449 S4: a_0 = ..use(a_1).. true S6 -
4450 '---> S6: a_new = .... - S4 -
4451 S5: ... = ..use(a_0).. - - -
4453 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4454 to each other through the RELATED_STMT field).
4456 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4457 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4458 remain irrelevant unless used by stmts other than S4.
4460 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4461 (because they are marked as irrelevant). It will vectorize S6, and record
4462 a pointer to the new vector stmt VS6 from S6 (as usual).
4463 S4 will be skipped, and S5 will be vectorized as usual:
4465 in_pattern_p related_stmt vec_stmt
4466 S1: a_i = .... - - -
4467 S2: a_2 = ..use(a_i).. - - -
4468 S3: a_1 = ..use(a_2).. - - -
4469 > VS6: va_new = .... - - -
4470 S4: a_0 = ..use(a_1).. true S6 VS6
4471 '---> S6: a_new = .... - S4 VS6
4472 > VS5: ... = ..vuse(va_new).. - - -
4473 S5: ... = ..use(a_0).. - - -
4475 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4476 elsewhere), and we'll end up with:
4478 VS6: va_new = ....
4479 VS5: ... = ..vuse(va_new)..
4481 In case of more than one pattern statements, e.g., widen-mult with
4482 intermediate type:
4484 S1 a_t = ;
4485 S2 a_T = (TYPE) a_t;
4486 '--> S3: a_it = (interm_type) a_t;
4487 S4 prod_T = a_T * CONST;
4488 '--> S5: prod_T' = a_it w* CONST;
4490 there may be other users of a_T outside the pattern. In that case S2 will
4491 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4492 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4493 be recorded in S3. */
4495 void
4496 vect_pattern_recog (vec_info *vinfo)
4498 struct loop *loop;
4499 basic_block *bbs;
4500 unsigned int nbbs;
4501 gimple_stmt_iterator si;
4502 unsigned int i, j;
4503 auto_vec<gimple *, 1> stmts_to_replace;
4505 DUMP_VECT_SCOPE ("vect_pattern_recog");
4507 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4509 loop = LOOP_VINFO_LOOP (loop_vinfo);
4510 bbs = LOOP_VINFO_BBS (loop_vinfo);
4511 nbbs = loop->num_nodes;
4513 /* Scan through the loop stmts, applying the pattern recognition
4514 functions starting at each stmt visited: */
4515 for (i = 0; i < nbbs; i++)
4517 basic_block bb = bbs[i];
4518 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4519 /* Scan over all generic vect_recog_xxx_pattern functions. */
4520 for (j = 0; j < NUM_PATTERNS; j++)
4521 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4522 &stmts_to_replace);
4525 else
4527 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4528 for (si = bb_vinfo->region_begin;
4529 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4531 gimple *stmt = gsi_stmt (si);
4532 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4533 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
4534 continue;
4536 /* Scan over all generic vect_recog_xxx_pattern functions. */
4537 for (j = 0; j < NUM_PATTERNS; j++)
4538 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4539 &stmts_to_replace);