Ensure PATTERN_DEF_SEQ is empty before recognising patterns
[official-gcc.git] / gcc / tree-vect-patterns.c
blobfbcfa29b0f56f5cdb6666f8bc8eb15610f84a94f
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
53 static bool
54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
56 value_range_type vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
71 return true;
73 else
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
80 return false;
84 /* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
87 static void
88 vect_pattern_detected (const char *name, gimple *stmt)
90 if (dump_enabled_p ())
92 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: ", name);
93 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
97 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO.
98 Set its vector type to VECTYPE if it doesn't have one already. */
100 static void
101 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
102 tree vectype)
104 stmt_vec_info pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
105 if (pattern_stmt_info == NULL)
107 pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
108 orig_stmt_info->vinfo);
109 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
111 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
113 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info->stmt;
114 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
115 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
116 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
117 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
120 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
121 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
122 have one already. */
124 static void
125 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
126 tree vectype)
128 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
129 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
130 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
133 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
134 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
135 be different from the vector type of the final pattern statement. */
137 static inline void
138 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
139 tree vectype = NULL_TREE)
141 vec_info *vinfo = stmt_info->vinfo;
142 if (vectype)
144 gcc_assert (!vinfo_for_stmt (new_stmt));
145 stmt_vec_info new_stmt_info = new_stmt_vec_info (new_stmt, vinfo);
146 set_vinfo_for_stmt (new_stmt, new_stmt_info);
147 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
149 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
150 new_stmt);
153 /* The caller wants to perform new operations on vect_external variable
154 VAR, so that the result of the operations would also be vect_external.
155 Return the edge on which the operations can be performed, if one exists.
156 Return null if the operations should instead be treated as part of
157 the pattern that needs them. */
159 static edge
160 vect_get_external_def_edge (vec_info *vinfo, tree var)
162 edge e = NULL;
163 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
165 e = loop_preheader_edge (loop_vinfo->loop);
166 if (!SSA_NAME_IS_DEFAULT_DEF (var))
168 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
169 if (bb == NULL
170 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
171 e = NULL;
174 return e;
177 /* Return true if the target supports a vector version of CODE,
178 where CODE is known to map to a direct optab. ITYPE specifies
179 the type of (some of) the scalar inputs and OTYPE specifies the
180 type of the scalar result.
182 If CODE allows the inputs and outputs to have different type
183 (such as for WIDEN_SUM_EXPR), it is the input mode rather
184 than the output mode that determines the appropriate target pattern.
185 Operand 0 of the target pattern then specifies the mode that the output
186 must have.
188 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
189 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
190 is nonnull. */
192 static bool
193 vect_supportable_direct_optab_p (tree otype, tree_code code,
194 tree itype, tree *vecotype_out,
195 tree *vecitype_out = NULL)
197 tree vecitype = get_vectype_for_scalar_type (itype);
198 if (!vecitype)
199 return false;
201 tree vecotype = get_vectype_for_scalar_type (otype);
202 if (!vecotype)
203 return false;
205 optab optab = optab_for_tree_code (code, vecitype, optab_default);
206 if (!optab)
207 return false;
209 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
210 if (icode == CODE_FOR_nothing
211 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
212 return false;
214 *vecotype_out = vecotype;
215 if (vecitype_out)
216 *vecitype_out = vecitype;
217 return true;
220 /* Round bit precision PRECISION up to a full element. */
222 static unsigned int
223 vect_element_precision (unsigned int precision)
225 precision = 1 << ceil_log2 (precision);
226 return MAX (precision, BITS_PER_UNIT);
229 /* If OP is defined by a statement that's being considered for vectorization,
230 return information about that statement, otherwise return NULL. */
232 static stmt_vec_info
233 vect_get_internal_def (vec_info *vinfo, tree op)
235 vect_def_type dt;
236 gimple *def_stmt;
237 if (TREE_CODE (op) != SSA_NAME
238 || !vect_is_simple_use (op, vinfo, &dt, &def_stmt)
239 || dt != vect_internal_def)
240 return NULL;
242 return vinfo_for_stmt (def_stmt);
245 /* Check whether NAME, an ssa-name used in USE_STMT,
246 is a result of a type promotion, such that:
247 DEF_STMT: NAME = NOP (name0)
248 If CHECK_SIGN is TRUE, check that either both types are signed or both are
249 unsigned. */
251 static bool
252 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
253 tree *orig_type, gimple **def_stmt, bool *promotion)
255 stmt_vec_info stmt_vinfo;
256 tree type = TREE_TYPE (name);
257 tree oprnd0;
258 enum vect_def_type dt;
260 stmt_vinfo = vinfo_for_stmt (use_stmt);
261 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, def_stmt))
262 return false;
264 if (dt != vect_internal_def
265 && dt != vect_external_def && dt != vect_constant_def)
266 return false;
268 if (!*def_stmt)
269 return false;
271 if (!is_gimple_assign (*def_stmt))
272 return false;
274 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
275 return false;
277 oprnd0 = gimple_assign_rhs1 (*def_stmt);
279 *orig_type = TREE_TYPE (oprnd0);
280 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
281 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
282 return false;
284 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
285 *promotion = true;
286 else
287 *promotion = false;
289 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
290 return false;
292 return true;
295 /* Holds information about an input operand after some sign changes
296 and type promotions have been peeled away. */
297 struct vect_unpromoted_value {
298 vect_unpromoted_value ();
300 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
302 /* The value obtained after peeling away zero or more casts. */
303 tree op;
305 /* The type of OP. */
306 tree type;
308 /* The definition type of OP. */
309 vect_def_type dt;
311 /* If OP is the result of peeling at least one cast, and if the cast
312 of OP itself is a vectorizable statement, CASTER identifies that
313 statement, otherwise it is null. */
314 stmt_vec_info caster;
317 inline vect_unpromoted_value::vect_unpromoted_value ()
318 : op (NULL_TREE),
319 type (NULL_TREE),
320 dt (vect_uninitialized_def),
321 caster (NULL)
325 /* Set the operand to OP_IN, its definition type to DT_IN, and the
326 statement that casts it to CASTER_IN. */
328 inline void
329 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
330 stmt_vec_info caster_in)
332 op = op_in;
333 type = TREE_TYPE (op);
334 dt = dt_in;
335 caster = caster_in;
338 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
339 to reach some vectorizable inner operand OP', continuing as long as it
340 is possible to convert OP' back to OP using a possible sign change
341 followed by a possible promotion P. Return this OP', or null if OP is
342 not a vectorizable SSA name. If there is a promotion P, describe its
343 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
344 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
345 have more than one user.
347 A successful return means that it is possible to go from OP' to OP
348 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
349 whereas the cast from UNPROM to OP might be a promotion, a sign
350 change, or a nop.
352 E.g. say we have:
354 signed short *ptr = ...;
355 signed short C = *ptr;
356 unsigned short B = (unsigned short) C; // sign change
357 signed int A = (signed int) B; // unsigned promotion
358 ...possible other uses of A...
359 unsigned int OP = (unsigned int) A; // sign change
361 In this case it's possible to go directly from C to OP using:
363 OP = (unsigned int) (unsigned short) C;
364 +------------+ +--------------+
365 promotion sign change
367 so OP' would be C. The input to the promotion is B, so UNPROM
368 would describe B. */
370 static tree
371 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
372 vect_unpromoted_value *unprom,
373 bool *single_use_p = NULL)
375 tree res = NULL_TREE;
376 tree op_type = TREE_TYPE (op);
377 unsigned int orig_precision = TYPE_PRECISION (op_type);
378 stmt_vec_info caster = NULL;
379 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
381 /* See whether OP is simple enough to vectorize. */
382 gimple *def_stmt;
383 vect_def_type dt;
384 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt))
385 break;
387 /* If OP is the input of a demotion, skip over it to see whether
388 OP is itself the result of a promotion. If so, the combined
389 effect of the promotion and the demotion might fit the required
390 pattern, otherwise neither operation fits.
392 This copes with cases such as the result of an arithmetic
393 operation being truncated before being stored, and where that
394 arithmetic operation has been recognized as an over-widened one. */
395 if (TYPE_PRECISION (op_type) <= orig_precision)
397 /* Use OP as the UNPROM described above if we haven't yet
398 found a promotion, or if using the new input preserves the
399 sign of the previous promotion. */
400 if (!res
401 || TYPE_PRECISION (unprom->type) == orig_precision
402 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
403 unprom->set_op (op, dt, caster);
404 /* Stop if we've already seen a promotion and if this
405 conversion does more than change the sign. */
406 else if (TYPE_PRECISION (op_type)
407 != TYPE_PRECISION (unprom->type))
408 break;
410 /* The sequence now extends to OP. */
411 res = op;
414 /* See whether OP is defined by a cast. Record it as CASTER if
415 the cast is potentially vectorizable. */
416 if (!def_stmt)
417 break;
418 if (dt == vect_internal_def)
420 caster = vinfo_for_stmt (def_stmt);
421 /* Ignore pattern statements, since we don't link uses for them. */
422 if (single_use_p
423 && !STMT_VINFO_RELATED_STMT (caster)
424 && !has_single_use (res))
425 *single_use_p = false;
427 else
428 caster = NULL;
429 gassign *assign = dyn_cast <gassign *> (def_stmt);
430 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
431 break;
433 /* Continue with the input to the cast. */
434 op = gimple_assign_rhs1 (def_stmt);
435 op_type = TREE_TYPE (op);
437 return res;
440 /* OP is an integer operand to an operation that returns TYPE, and we
441 want to treat the operation as a widening one. So far we can treat
442 it as widening from *COMMON_TYPE.
444 Return true if OP is suitable for such a widening operation,
445 either widening from *COMMON_TYPE or from some supertype of it.
446 Update *COMMON_TYPE to the supertype in the latter case.
448 SHIFT_P is true if OP is a shift amount. */
450 static bool
451 vect_joust_widened_integer (tree type, bool shift_p, tree op,
452 tree *common_type)
454 /* Calculate the minimum precision required by OP, without changing
455 the sign of either operand. */
456 unsigned int precision;
457 if (shift_p)
459 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
460 return false;
461 precision = TREE_INT_CST_LOW (op);
463 else
465 precision = wi::min_precision (wi::to_widest (op),
466 TYPE_SIGN (*common_type));
467 if (precision * 2 > TYPE_PRECISION (type))
468 return false;
471 /* If OP requires a wider type, switch to that type. The checks
472 above ensure that this is still narrower than the result. */
473 precision = vect_element_precision (precision);
474 if (TYPE_PRECISION (*common_type) < precision)
475 *common_type = build_nonstandard_integer_type
476 (precision, TYPE_UNSIGNED (*common_type));
477 return true;
480 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
481 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
483 static bool
484 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
486 if (types_compatible_p (*common_type, new_type))
487 return true;
489 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
490 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
491 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
492 return true;
494 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
495 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
496 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
498 *common_type = new_type;
499 return true;
502 /* We have mismatched signs, with the signed type being
503 no wider than the unsigned type. In this case we need
504 a wider signed type. */
505 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
506 TYPE_PRECISION (new_type));
507 precision *= 2;
508 if (precision * 2 > TYPE_PRECISION (type))
509 return false;
511 *common_type = build_nonstandard_integer_type (precision, false);
512 return true;
515 /* Check whether STMT_INFO can be viewed as a tree of integer operations
516 in which each node either performs CODE or WIDENED_CODE, and where
517 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
518 specifies the maximum number of leaf operands. SHIFT_P says whether
519 CODE and WIDENED_CODE are some sort of shift.
521 If STMT_INFO is such a tree, return the number of leaf operands
522 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
523 to a type that (a) is narrower than the result of STMT_INFO and
524 (b) can hold all leaf operand values.
526 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
527 exists. */
529 static unsigned int
530 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
531 tree_code widened_code, bool shift_p,
532 unsigned int max_nops,
533 vect_unpromoted_value *unprom, tree *common_type)
535 /* Check for an integer operation with the right code. */
536 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
537 if (!assign)
538 return 0;
540 tree_code rhs_code = gimple_assign_rhs_code (assign);
541 if (rhs_code != code && rhs_code != widened_code)
542 return 0;
544 tree type = gimple_expr_type (assign);
545 if (!INTEGRAL_TYPE_P (type))
546 return 0;
548 /* Assume that both operands will be leaf operands. */
549 max_nops -= 2;
551 /* Check the operands. */
552 unsigned int next_op = 0;
553 for (unsigned int i = 0; i < 2; ++i)
555 vect_unpromoted_value *this_unprom = &unprom[next_op];
556 unsigned int nops = 1;
557 tree op = gimple_op (assign, i + 1);
558 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
560 /* We already have a common type from earlier operands.
561 Update it to account for OP. */
562 this_unprom->set_op (op, vect_constant_def);
563 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
564 return 0;
566 else
568 /* Only allow shifts by constants. */
569 if (shift_p && i == 1)
570 return 0;
572 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
573 this_unprom))
574 return 0;
576 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
578 /* The operand isn't widened. If STMT_INFO has the code
579 for an unwidened operation, recursively check whether
580 this operand is a node of the tree. */
581 if (rhs_code != code
582 || max_nops == 0
583 || this_unprom->dt != vect_internal_def)
584 return 0;
586 /* Give back the leaf slot allocated above now that we're
587 not treating this as a leaf operand. */
588 max_nops += 1;
590 /* Recursively process the definition of the operand. */
591 stmt_vec_info def_stmt_info
592 = vinfo_for_stmt (SSA_NAME_DEF_STMT (this_unprom->op));
593 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
594 shift_p, max_nops, this_unprom,
595 common_type);
596 if (nops == 0)
597 return 0;
599 max_nops -= nops;
601 else
603 /* Make sure that the operand is narrower than the result. */
604 if (TYPE_PRECISION (this_unprom->type) * 2
605 > TYPE_PRECISION (type))
606 return 0;
608 /* Update COMMON_TYPE for the new operand. */
609 if (i == 0)
610 *common_type = this_unprom->type;
611 else if (!vect_joust_widened_type (type, this_unprom->type,
612 common_type))
613 return 0;
616 next_op += nops;
618 return next_op;
621 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
622 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
624 static tree
625 vect_recog_temp_ssa_var (tree type, gimple *stmt)
627 return make_temp_ssa_name (type, stmt, "patt");
630 /* STMT2_INFO describes a type conversion that could be split into STMT1
631 followed by a version of STMT2_INFO that takes NEW_RHS as its first
632 input. Try to do this using pattern statements, returning true on
633 success. */
635 static bool
636 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
637 gimple *stmt1, tree vectype)
639 if (is_pattern_stmt_p (stmt2_info))
641 /* STMT2_INFO is part of a pattern. Get the statement to which
642 the pattern is attached. */
643 stmt_vec_info orig_stmt2_info
644 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt2_info));
645 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
647 if (dump_enabled_p ())
649 dump_printf_loc (MSG_NOTE, vect_location,
650 "Splitting pattern statement: ");
651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
654 /* Since STMT2_INFO is a pattern statement, we can change it
655 in-situ without worrying about changing the code for the
656 containing block. */
657 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
659 if (dump_enabled_p ())
661 dump_printf_loc (MSG_NOTE, vect_location, "into: ");
662 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
663 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
664 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
667 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
668 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info->stmt)
669 /* STMT2_INFO is the actual pattern statement. Add STMT1
670 to the end of the definition sequence. */
671 gimple_seq_add_stmt_without_update (def_seq, stmt1);
672 else
674 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
675 before it. */
676 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
677 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
679 return true;
681 else
683 /* STMT2_INFO doesn't yet have a pattern. Try to create a
684 two-statement pattern now. */
685 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
686 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
687 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
688 if (!lhs_vectype)
689 return false;
691 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "Splitting statement: ");
695 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
698 /* Add STMT1 as a singleton pattern definition sequence. */
699 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
700 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
701 gimple_seq_add_stmt_without_update (def_seq, stmt1);
703 /* Build the second of the two pattern statements. */
704 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
705 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
706 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
708 if (dump_enabled_p ())
710 dump_printf_loc (MSG_NOTE, vect_location,
711 "into pattern statements: ");
712 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
713 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
714 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt2, 0);
717 return true;
721 /* Convert UNPROM to TYPE and return the result, adding new statements
722 to STMT_INFO's pattern definition statements if no better way is
723 available. VECTYPE is the vector form of TYPE. */
725 static tree
726 vect_convert_input (stmt_vec_info stmt_info, tree type,
727 vect_unpromoted_value *unprom, tree vectype)
729 /* Check for a no-op conversion. */
730 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
731 return unprom->op;
733 /* Allow the caller to create constant vect_unpromoted_values. */
734 if (TREE_CODE (unprom->op) == INTEGER_CST)
735 return wide_int_to_tree (type, wi::to_widest (unprom->op));
737 /* See if we can reuse an existing result. */
738 if (unprom->caster)
740 tree lhs = gimple_get_lhs (unprom->caster->stmt);
741 if (types_compatible_p (TREE_TYPE (lhs), type))
742 return lhs;
745 /* We need a new conversion statement. */
746 tree new_op = vect_recog_temp_ssa_var (type, NULL);
747 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
749 /* If the operation is the input to a vectorizable cast, try splitting
750 that cast into two, taking the required result as a mid-way point. */
751 if (unprom->caster)
753 tree lhs = gimple_get_lhs (unprom->caster->stmt);
754 if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
755 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
756 && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
757 && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
758 return new_op;
761 /* If OP is an external value, see if we can insert the new statement
762 on an incoming edge. */
763 if (unprom->dt == vect_external_def)
764 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
766 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
767 gcc_assert (!new_bb);
768 return new_op;
771 /* As a (common) last resort, add the statement to the pattern itself. */
772 append_pattern_def_seq (stmt_info, new_stmt, vectype);
773 return new_op;
776 /* Invoke vect_convert_input for N elements of UNPROM and store the
777 result in the corresponding elements of RESULT. */
779 static void
780 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
781 tree *result, tree type, vect_unpromoted_value *unprom,
782 tree vectype)
784 for (unsigned int i = 0; i < n; ++i)
786 unsigned int j;
787 for (j = 0; j < i; ++j)
788 if (unprom[j].op == unprom[i].op)
789 break;
790 if (j < i)
791 result[i] = result[j];
792 else
793 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
797 /* The caller has created a (possibly empty) sequence of pattern definition
798 statements followed by a single statement PATTERN_STMT. Cast the result
799 of this final statement to TYPE. If a new statement is needed, add
800 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
801 and return the new statement, otherwise return PATTERN_STMT as-is.
802 VECITYPE is the vector form of PATTERN_STMT's result type. */
804 static gimple *
805 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
806 tree vecitype)
808 tree lhs = gimple_get_lhs (pattern_stmt);
809 if (!types_compatible_p (type, TREE_TYPE (lhs)))
811 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
812 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
813 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
815 return pattern_stmt;
818 /* Return true if STMT_VINFO describes a reduction for which reassociation
819 is allowed. If STMT_INFO is part of a group, assume that it's part of
820 a reduction chain and optimistically assume that all statements
821 except the last allow reassociation. */
823 static bool
824 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
826 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
827 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
828 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
831 /* As above, but also require it to have code CODE and to be a reduction
832 in the outermost loop. When returning true, store the operands in
833 *OP0_OUT and *OP1_OUT. */
835 static bool
836 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
837 tree *op0_out, tree *op1_out)
839 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
840 if (!loop_info)
841 return false;
843 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
844 if (!assign || gimple_assign_rhs_code (assign) != code)
845 return false;
847 /* We don't allow changing the order of the computation in the inner-loop
848 when doing outer-loop vectorization. */
849 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
850 if (loop && nested_in_vect_loop_p (loop, assign))
851 return false;
853 if (!vect_reassociating_reduction_p (stmt_info))
854 return false;
856 *op0_out = gimple_assign_rhs1 (assign);
857 *op1_out = gimple_assign_rhs2 (assign);
858 return true;
861 /* Function vect_recog_dot_prod_pattern
863 Try to find the following pattern:
865 type x_t, y_t;
866 TYPE1 prod;
867 TYPE2 sum = init;
868 loop:
869 sum_0 = phi <init, sum_1>
870 S1 x_t = ...
871 S2 y_t = ...
872 S3 x_T = (TYPE1) x_t;
873 S4 y_T = (TYPE1) y_t;
874 S5 prod = x_T * y_T;
875 [S6 prod = (TYPE2) prod; #optional]
876 S7 sum_1 = prod + sum_0;
878 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
879 same size of 'TYPE1' or bigger. This is a special case of a reduction
880 computation.
882 Input:
884 * STMT_VINFO: The stmt from which the pattern search begins. In the
885 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
886 will be detected.
888 Output:
890 * TYPE_OUT: The type of the output of this pattern.
892 * Return value: A new stmt that will be used to replace the sequence of
893 stmts that constitute the pattern. In this case it will be:
894 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
896 Note: The dot-prod idiom is a widening reduction pattern that is
897 vectorized without preserving all the intermediate results. It
898 produces only N/2 (widened) results (by summing up pairs of
899 intermediate results) rather than all N results. Therefore, we
900 cannot allow this pattern when we want to get all the results and in
901 the correct order (as is the case when this computation is in an
902 inner-loop nested in an outer-loop that us being vectorized). */
904 static gimple *
905 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
907 tree oprnd0, oprnd1;
908 gimple *last_stmt = stmt_vinfo->stmt;
909 vec_info *vinfo = stmt_vinfo->vinfo;
910 tree type, half_type;
911 gimple *pattern_stmt;
912 tree var;
914 /* Look for the following pattern
915 DX = (TYPE1) X;
916 DY = (TYPE1) Y;
917 DPROD = DX * DY;
918 DDPROD = (TYPE2) DPROD;
919 sum_1 = DDPROD + sum_0;
920 In which
921 - DX is double the size of X
922 - DY is double the size of Y
923 - DX, DY, DPROD all have the same type
924 - sum is the same size of DPROD or bigger
925 - sum has been recognized as a reduction variable.
927 This is equivalent to:
928 DPROD = X w* Y; #widen mult
929 sum_1 = DPROD w+ sum_0; #widen summation
931 DPROD = X w* Y; #widen mult
932 sum_1 = DPROD + sum_0; #summation
935 /* Starting from LAST_STMT, follow the defs of its uses in search
936 of the above pattern. */
938 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
939 &oprnd0, &oprnd1))
940 return NULL;
942 type = gimple_expr_type (last_stmt);
944 vect_unpromoted_value unprom_mult;
945 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
947 /* So far so good. Since last_stmt was detected as a (summation) reduction,
948 we know that oprnd1 is the reduction variable (defined by a loop-header
949 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
950 Left to check that oprnd0 is defined by a (widen_)mult_expr */
951 if (!oprnd0)
952 return NULL;
954 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
955 if (!mult_vinfo)
956 return NULL;
958 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
959 inside the loop (in case we are analyzing an outer-loop). */
960 vect_unpromoted_value unprom0[2];
961 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
962 false, 2, unprom0, &half_type))
963 return NULL;
965 /* If there are two widening operations, make sure they agree on
966 the sign of the extension. */
967 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
968 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
969 return NULL;
971 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
973 tree half_vectype;
974 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
975 type_out, &half_vectype))
976 return NULL;
978 /* Get the inputs in the appropriate types. */
979 tree mult_oprnd[2];
980 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
981 unprom0, half_vectype);
983 var = vect_recog_temp_ssa_var (type, NULL);
984 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
985 mult_oprnd[0], mult_oprnd[1], oprnd1);
987 return pattern_stmt;
991 /* Function vect_recog_sad_pattern
993 Try to find the following Sum of Absolute Difference (SAD) pattern:
995 type x_t, y_t;
996 signed TYPE1 diff, abs_diff;
997 TYPE2 sum = init;
998 loop:
999 sum_0 = phi <init, sum_1>
1000 S1 x_t = ...
1001 S2 y_t = ...
1002 S3 x_T = (TYPE1) x_t;
1003 S4 y_T = (TYPE1) y_t;
1004 S5 diff = x_T - y_T;
1005 S6 abs_diff = ABS_EXPR <diff>;
1006 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1007 S8 sum_1 = abs_diff + sum_0;
1009 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1010 same size of 'TYPE1' or bigger. This is a special case of a reduction
1011 computation.
1013 Input:
1015 * STMT_VINFO: The stmt from which the pattern search begins. In the
1016 example, when this function is called with S8, the pattern
1017 {S3,S4,S5,S6,S7,S8} will be detected.
1019 Output:
1021 * TYPE_OUT: The type of the output of this pattern.
1023 * Return value: A new stmt that will be used to replace the sequence of
1024 stmts that constitute the pattern. In this case it will be:
1025 SAD_EXPR <x_t, y_t, sum_0>
1028 static gimple *
1029 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1031 gimple *last_stmt = stmt_vinfo->stmt;
1032 vec_info *vinfo = stmt_vinfo->vinfo;
1033 tree half_type;
1035 /* Look for the following pattern
1036 DX = (TYPE1) X;
1037 DY = (TYPE1) Y;
1038 DDIFF = DX - DY;
1039 DAD = ABS_EXPR <DDIFF>;
1040 DDPROD = (TYPE2) DPROD;
1041 sum_1 = DAD + sum_0;
1042 In which
1043 - DX is at least double the size of X
1044 - DY is at least double the size of Y
1045 - DX, DY, DDIFF, DAD all have the same type
1046 - sum is the same size of DAD or bigger
1047 - sum has been recognized as a reduction variable.
1049 This is equivalent to:
1050 DDIFF = X w- Y; #widen sub
1051 DAD = ABS_EXPR <DDIFF>;
1052 sum_1 = DAD w+ sum_0; #widen summation
1054 DDIFF = X w- Y; #widen sub
1055 DAD = ABS_EXPR <DDIFF>;
1056 sum_1 = DAD + sum_0; #summation
1059 /* Starting from LAST_STMT, follow the defs of its uses in search
1060 of the above pattern. */
1062 tree plus_oprnd0, plus_oprnd1;
1063 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1064 &plus_oprnd0, &plus_oprnd1))
1065 return NULL;
1067 tree sum_type = gimple_expr_type (last_stmt);
1069 /* Any non-truncating sequence of conversions is OK here, since
1070 with a successful match, the result of the ABS(U) is known to fit
1071 within the nonnegative range of the result type. (It cannot be the
1072 negative of the minimum signed value due to the range of the widening
1073 MINUS_EXPR.) */
1074 vect_unpromoted_value unprom_abs;
1075 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1076 &unprom_abs);
1078 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1079 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1080 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1081 Then check that plus_oprnd0 is defined by an abs_expr. */
1083 if (!plus_oprnd0)
1084 return NULL;
1086 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1087 if (!abs_stmt_vinfo)
1088 return NULL;
1090 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1091 inside the loop (in case we are analyzing an outer-loop). */
1092 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1093 if (!abs_stmt
1094 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1095 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1096 return NULL;
1098 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1099 tree abs_type = TREE_TYPE (abs_oprnd);
1100 if (TYPE_UNSIGNED (abs_type))
1101 return NULL;
1103 /* Peel off conversions from the ABS input. This can involve sign
1104 changes (e.g. from an unsigned subtraction to a signed ABS input)
1105 or signed promotion, but it can't include unsigned promotion.
1106 (Note that ABS of an unsigned promotion should have been folded
1107 away before now anyway.) */
1108 vect_unpromoted_value unprom_diff;
1109 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1110 &unprom_diff);
1111 if (!abs_oprnd)
1112 return NULL;
1113 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1114 && TYPE_UNSIGNED (unprom_diff.type))
1115 return NULL;
1117 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1118 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1119 if (!diff_stmt_vinfo)
1120 return NULL;
1122 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1123 inside the loop (in case we are analyzing an outer-loop). */
1124 vect_unpromoted_value unprom[2];
1125 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1126 false, 2, unprom, &half_type))
1127 return NULL;
1129 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1131 tree half_vectype;
1132 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1133 type_out, &half_vectype))
1134 return NULL;
1136 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1137 tree sad_oprnd[2];
1138 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1139 unprom, half_vectype);
1141 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1142 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1143 sad_oprnd[1], plus_oprnd1);
1145 return pattern_stmt;
1148 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1149 so that it can be treated as though it had the form:
1151 A_TYPE a;
1152 B_TYPE b;
1153 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1154 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1155 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1156 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1157 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1159 Try to replace the pattern with:
1161 A_TYPE a;
1162 B_TYPE b;
1163 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1164 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1165 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1166 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1168 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1170 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1171 name of the pattern being matched, for dump purposes. */
1173 static gimple *
1174 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1175 tree_code orig_code, tree_code wide_code,
1176 bool shift_p, const char *name)
1178 gimple *last_stmt = last_stmt_info->stmt;
1180 vect_unpromoted_value unprom[2];
1181 tree half_type;
1182 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1183 shift_p, 2, unprom, &half_type))
1184 return NULL;
1186 /* Pattern detected. */
1187 vect_pattern_detected (name, last_stmt);
1189 tree type = gimple_expr_type (last_stmt);
1190 tree itype = type;
1191 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1192 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1193 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1194 TYPE_UNSIGNED (half_type));
1196 /* Check target support */
1197 tree vectype = get_vectype_for_scalar_type (half_type);
1198 tree vecitype = get_vectype_for_scalar_type (itype);
1199 enum tree_code dummy_code;
1200 int dummy_int;
1201 auto_vec<tree> dummy_vec;
1202 if (!vectype
1203 || !vecitype
1204 || !supportable_widening_operation (wide_code, last_stmt,
1205 vecitype, vectype,
1206 &dummy_code, &dummy_code,
1207 &dummy_int, &dummy_vec))
1208 return NULL;
1210 *type_out = get_vectype_for_scalar_type (type);
1211 if (!*type_out)
1212 return NULL;
1214 tree oprnd[2];
1215 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1217 tree var = vect_recog_temp_ssa_var (itype, NULL);
1218 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1219 oprnd[0], oprnd[1]);
1221 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1224 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1225 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1227 static gimple *
1228 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1230 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1231 WIDEN_MULT_EXPR, false,
1232 "vect_recog_widen_mult_pattern");
1235 /* Function vect_recog_pow_pattern
1237 Try to find the following pattern:
1239 x = POW (y, N);
1241 with POW being one of pow, powf, powi, powif and N being
1242 either 2 or 0.5.
1244 Input:
1246 * STMT_VINFO: The stmt from which the pattern search begins.
1248 Output:
1250 * TYPE_OUT: The type of the output of this pattern.
1252 * Return value: A new stmt that will be used to replace the sequence of
1253 stmts that constitute the pattern. In this case it will be:
1254 x = x * x
1256 x = sqrt (x)
1259 static gimple *
1260 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1262 gimple *last_stmt = stmt_vinfo->stmt;
1263 tree base, exp;
1264 gimple *stmt;
1265 tree var;
1267 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1268 return NULL;
1270 switch (gimple_call_combined_fn (last_stmt))
1272 CASE_CFN_POW:
1273 CASE_CFN_POWI:
1274 break;
1276 default:
1277 return NULL;
1280 base = gimple_call_arg (last_stmt, 0);
1281 exp = gimple_call_arg (last_stmt, 1);
1282 if (TREE_CODE (exp) != REAL_CST
1283 && TREE_CODE (exp) != INTEGER_CST)
1285 if (flag_unsafe_math_optimizations
1286 && TREE_CODE (base) == REAL_CST
1287 && !gimple_call_internal_p (last_stmt))
1289 combined_fn log_cfn;
1290 built_in_function exp_bfn;
1291 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1293 case BUILT_IN_POW:
1294 log_cfn = CFN_BUILT_IN_LOG;
1295 exp_bfn = BUILT_IN_EXP;
1296 break;
1297 case BUILT_IN_POWF:
1298 log_cfn = CFN_BUILT_IN_LOGF;
1299 exp_bfn = BUILT_IN_EXPF;
1300 break;
1301 case BUILT_IN_POWL:
1302 log_cfn = CFN_BUILT_IN_LOGL;
1303 exp_bfn = BUILT_IN_EXPL;
1304 break;
1305 default:
1306 return NULL;
1308 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1309 tree exp_decl = builtin_decl_implicit (exp_bfn);
1310 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1311 does that, but if C is a power of 2, we want to use
1312 exp2 (log2 (C) * x) in the non-vectorized version, but for
1313 vectorization we don't have vectorized exp2. */
1314 if (logc
1315 && TREE_CODE (logc) == REAL_CST
1316 && exp_decl
1317 && lookup_attribute ("omp declare simd",
1318 DECL_ATTRIBUTES (exp_decl)))
1320 cgraph_node *node = cgraph_node::get_create (exp_decl);
1321 if (node->simd_clones == NULL)
1323 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1324 || node->definition)
1325 return NULL;
1326 expand_simd_clones (node);
1327 if (node->simd_clones == NULL)
1328 return NULL;
1330 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1331 if (!*type_out)
1332 return NULL;
1333 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1334 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1335 append_pattern_def_seq (stmt_vinfo, g);
1336 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1337 g = gimple_build_call (exp_decl, 1, def);
1338 gimple_call_set_lhs (g, res);
1339 return g;
1343 return NULL;
1346 /* We now have a pow or powi builtin function call with a constant
1347 exponent. */
1349 /* Catch squaring. */
1350 if ((tree_fits_shwi_p (exp)
1351 && tree_to_shwi (exp) == 2)
1352 || (TREE_CODE (exp) == REAL_CST
1353 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1355 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1356 TREE_TYPE (base), type_out))
1357 return NULL;
1359 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1360 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1361 return stmt;
1364 /* Catch square root. */
1365 if (TREE_CODE (exp) == REAL_CST
1366 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1368 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1369 if (*type_out
1370 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1371 OPTIMIZE_FOR_SPEED))
1373 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1374 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1375 gimple_call_set_lhs (stmt, var);
1376 gimple_call_set_nothrow (stmt, true);
1377 return stmt;
1381 return NULL;
1385 /* Function vect_recog_widen_sum_pattern
1387 Try to find the following pattern:
1389 type x_t;
1390 TYPE x_T, sum = init;
1391 loop:
1392 sum_0 = phi <init, sum_1>
1393 S1 x_t = *p;
1394 S2 x_T = (TYPE) x_t;
1395 S3 sum_1 = x_T + sum_0;
1397 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1398 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1399 a special case of a reduction computation.
1401 Input:
1403 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1404 when this function is called with S3, the pattern {S2,S3} will be detected.
1406 Output:
1408 * TYPE_OUT: The type of the output of this pattern.
1410 * Return value: A new stmt that will be used to replace the sequence of
1411 stmts that constitute the pattern. In this case it will be:
1412 WIDEN_SUM <x_t, sum_0>
1414 Note: The widening-sum idiom is a widening reduction pattern that is
1415 vectorized without preserving all the intermediate results. It
1416 produces only N/2 (widened) results (by summing up pairs of
1417 intermediate results) rather than all N results. Therefore, we
1418 cannot allow this pattern when we want to get all the results and in
1419 the correct order (as is the case when this computation is in an
1420 inner-loop nested in an outer-loop that us being vectorized). */
1422 static gimple *
1423 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1425 gimple *last_stmt = stmt_vinfo->stmt;
1426 tree oprnd0, oprnd1;
1427 vec_info *vinfo = stmt_vinfo->vinfo;
1428 tree type;
1429 gimple *pattern_stmt;
1430 tree var;
1432 /* Look for the following pattern
1433 DX = (TYPE) X;
1434 sum_1 = DX + sum_0;
1435 In which DX is at least double the size of X, and sum_1 has been
1436 recognized as a reduction variable.
1439 /* Starting from LAST_STMT, follow the defs of its uses in search
1440 of the above pattern. */
1442 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1443 &oprnd0, &oprnd1))
1444 return NULL;
1446 type = gimple_expr_type (last_stmt);
1448 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1449 we know that oprnd1 is the reduction variable (defined by a loop-header
1450 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1451 Left to check that oprnd0 is defined by a cast from type 'type' to type
1452 'TYPE'. */
1454 vect_unpromoted_value unprom0;
1455 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1456 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1457 return NULL;
1459 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1461 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1462 type_out))
1463 return NULL;
1465 var = vect_recog_temp_ssa_var (type, NULL);
1466 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1468 return pattern_stmt;
1471 /* Recognize cases in which an operation is performed in one type WTYPE
1472 but could be done more efficiently in a narrower type NTYPE. For example,
1473 if we have:
1475 ATYPE a; // narrower than NTYPE
1476 BTYPE b; // narrower than NTYPE
1477 WTYPE aw = (WTYPE) a;
1478 WTYPE bw = (WTYPE) b;
1479 WTYPE res = aw + bw; // only uses of aw and bw
1481 then it would be more efficient to do:
1483 NTYPE an = (NTYPE) a;
1484 NTYPE bn = (NTYPE) b;
1485 NTYPE resn = an + bn;
1486 WTYPE res = (WTYPE) resn;
1488 Other situations include things like:
1490 ATYPE a; // NTYPE or narrower
1491 WTYPE aw = (WTYPE) a;
1492 WTYPE res = aw + b;
1494 when only "(NTYPE) res" is significant. In that case it's more efficient
1495 to truncate "b" and do the operation on NTYPE instead:
1497 NTYPE an = (NTYPE) a;
1498 NTYPE bn = (NTYPE) b; // truncation
1499 NTYPE resn = an + bn;
1500 WTYPE res = (WTYPE) resn;
1502 All users of "res" should then use "resn" instead, making the final
1503 statement dead (not marked as relevant). The final statement is still
1504 needed to maintain the type correctness of the IR.
1506 vect_determine_precisions has already determined the minimum
1507 precison of the operation and the minimum precision required
1508 by users of the result. */
1510 static gimple *
1511 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1513 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1514 if (!last_stmt)
1515 return NULL;
1517 /* See whether we have found that this operation can be done on a
1518 narrower type without changing its semantics. */
1519 unsigned int new_precision = last_stmt_info->operation_precision;
1520 if (!new_precision)
1521 return NULL;
1523 vec_info *vinfo = last_stmt_info->vinfo;
1524 tree lhs = gimple_assign_lhs (last_stmt);
1525 tree type = TREE_TYPE (lhs);
1526 tree_code code = gimple_assign_rhs_code (last_stmt);
1528 /* Keep the first operand of a COND_EXPR as-is: only the other two
1529 operands are interesting. */
1530 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1532 /* Check the operands. */
1533 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1534 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1535 unprom.quick_grow (nops);
1536 unsigned int min_precision = 0;
1537 bool single_use_p = false;
1538 for (unsigned int i = 0; i < nops; ++i)
1540 tree op = gimple_op (last_stmt, first_op + i);
1541 if (TREE_CODE (op) == INTEGER_CST)
1542 unprom[i].set_op (op, vect_constant_def);
1543 else if (TREE_CODE (op) == SSA_NAME)
1545 bool op_single_use_p = true;
1546 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1547 &op_single_use_p))
1548 return NULL;
1549 /* If:
1551 (1) N bits of the result are needed;
1552 (2) all inputs are widened from M<N bits; and
1553 (3) one operand OP is a single-use SSA name
1555 we can shift the M->N widening from OP to the output
1556 without changing the number or type of extensions involved.
1557 This then reduces the number of copies of STMT_INFO.
1559 If instead of (3) more than one operand is a single-use SSA name,
1560 shifting the extension to the output is even more of a win.
1562 If instead:
1564 (1) N bits of the result are needed;
1565 (2) one operand OP2 is widened from M2<N bits;
1566 (3) another operand OP1 is widened from M1<M2 bits; and
1567 (4) both OP1 and OP2 are single-use
1569 the choice is between:
1571 (a) truncating OP2 to M1, doing the operation on M1,
1572 and then widening the result to N
1574 (b) widening OP1 to M2, doing the operation on M2, and then
1575 widening the result to N
1577 Both shift the M2->N widening of the inputs to the output.
1578 (a) additionally shifts the M1->M2 widening to the output;
1579 it requires fewer copies of STMT_INFO but requires an extra
1580 M2->M1 truncation.
1582 Which is better will depend on the complexity and cost of
1583 STMT_INFO, which is hard to predict at this stage. However,
1584 a clear tie-breaker in favor of (b) is the fact that the
1585 truncation in (a) increases the length of the operation chain.
1587 If instead of (4) only one of OP1 or OP2 is single-use,
1588 (b) is still a win over doing the operation in N bits:
1589 it still shifts the M2->N widening on the single-use operand
1590 to the output and reduces the number of STMT_INFO copies.
1592 If neither operand is single-use then operating on fewer than
1593 N bits might lead to more extensions overall. Whether it does
1594 or not depends on global information about the vectorization
1595 region, and whether that's a good trade-off would again
1596 depend on the complexity and cost of the statements involved,
1597 as well as things like register pressure that are not normally
1598 modelled at this stage. We therefore ignore these cases
1599 and just optimize the clear single-use wins above.
1601 Thus we take the maximum precision of the unpromoted operands
1602 and record whether any operand is single-use. */
1603 if (unprom[i].dt == vect_internal_def)
1605 min_precision = MAX (min_precision,
1606 TYPE_PRECISION (unprom[i].type));
1607 single_use_p |= op_single_use_p;
1612 /* Although the operation could be done in operation_precision, we have
1613 to balance that against introducing extra truncations or extensions.
1614 Calculate the minimum precision that can be handled efficiently.
1616 The loop above determined that the operation could be handled
1617 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1618 extension from the inputs to the output without introducing more
1619 instructions, and would reduce the number of instructions required
1620 for STMT_INFO itself.
1622 vect_determine_precisions has also determined that the result only
1623 needs min_output_precision bits. Truncating by a factor of N times
1624 requires a tree of N - 1 instructions, so if TYPE is N times wider
1625 than min_output_precision, doing the operation in TYPE and truncating
1626 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1627 In contrast:
1629 - truncating the input to a unary operation and doing the operation
1630 in the new type requires at most N - 1 + 1 = N instructions per
1631 output vector
1633 - doing the same for a binary operation requires at most
1634 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1636 Both unary and binary operations require fewer instructions than
1637 this if the operands were extended from a suitable truncated form.
1638 Thus there is usually nothing to lose by doing operations in
1639 min_output_precision bits, but there can be something to gain. */
1640 if (!single_use_p)
1641 min_precision = last_stmt_info->min_output_precision;
1642 else
1643 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1645 /* Apply the minimum efficient precision we just calculated. */
1646 if (new_precision < min_precision)
1647 new_precision = min_precision;
1648 if (new_precision >= TYPE_PRECISION (type))
1649 return NULL;
1651 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1653 *type_out = get_vectype_for_scalar_type (type);
1654 if (!*type_out)
1655 return NULL;
1657 /* We've found a viable pattern. Get the new type of the operation. */
1658 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1659 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1661 /* We specifically don't check here whether the target supports the
1662 new operation, since it might be something that a later pattern
1663 wants to rewrite anyway. If targets have a minimum element size
1664 for some optabs, we should pattern-match smaller ops to larger ops
1665 where beneficial. */
1666 tree new_vectype = get_vectype_for_scalar_type (new_type);
1667 if (!new_vectype)
1668 return NULL;
1670 if (dump_enabled_p ())
1672 dump_printf_loc (MSG_NOTE, vect_location, "demoting ");
1673 dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
1674 dump_printf (MSG_NOTE, " to ");
1675 dump_generic_expr (MSG_NOTE, TDF_SLIM, new_type);
1676 dump_printf (MSG_NOTE, "\n");
1679 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1680 tree ops[3] = {};
1681 for (unsigned int i = 1; i < first_op; ++i)
1682 ops[i - 1] = gimple_op (last_stmt, i);
1683 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1684 new_type, &unprom[0], new_vectype);
1686 /* Use the operation to produce a result of type NEW_TYPE. */
1687 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1688 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1689 ops[0], ops[1], ops[2]);
1690 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1692 if (dump_enabled_p ())
1694 dump_printf_loc (MSG_NOTE, vect_location,
1695 "created pattern stmt: ");
1696 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1699 pattern_stmt = vect_convert_output (last_stmt_info, type,
1700 pattern_stmt, new_vectype);
1702 return pattern_stmt;
1705 /* Recognize the patterns:
1707 ATYPE a; // narrower than TYPE
1708 BTYPE b; // narrower than TYPE
1709 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1710 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1712 where only the bottom half of avg is used. Try to transform them into:
1714 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1715 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1717 followed by:
1719 TYPE avg = (TYPE) avg';
1721 where NTYPE is no wider than half of TYPE. Since only the bottom half
1722 of avg is used, all or part of the cast of avg' should become redundant. */
1724 static gimple *
1725 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1727 /* Check for a shift right by one bit. */
1728 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1729 vec_info *vinfo = last_stmt_info->vinfo;
1730 if (!last_stmt
1731 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1732 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1733 return NULL;
1735 /* Check that the shift result is wider than the users of the
1736 result need (i.e. that narrowing would be a natural choice). */
1737 tree lhs = gimple_assign_lhs (last_stmt);
1738 tree type = TREE_TYPE (lhs);
1739 unsigned int target_precision
1740 = vect_element_precision (last_stmt_info->min_output_precision);
1741 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1742 return NULL;
1744 /* Get the definition of the shift input. */
1745 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1746 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1747 if (!plus_stmt_info)
1748 return NULL;
1750 /* Check whether the shift input can be seen as a tree of additions on
1751 2 or 3 widened inputs.
1753 Note that the pattern should be a win even if the result of one or
1754 more additions is reused elsewhere: if the pattern matches, we'd be
1755 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1756 internal_fn ifn = IFN_AVG_FLOOR;
1757 vect_unpromoted_value unprom[3];
1758 tree new_type;
1759 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1760 PLUS_EXPR, false, 3,
1761 unprom, &new_type);
1762 if (nops == 0)
1763 return NULL;
1764 if (nops == 3)
1766 /* Check that one operand is 1. */
1767 unsigned int i;
1768 for (i = 0; i < 3; ++i)
1769 if (integer_onep (unprom[i].op))
1770 break;
1771 if (i == 3)
1772 return NULL;
1773 /* Throw away the 1 operand and keep the other two. */
1774 if (i < 2)
1775 unprom[i] = unprom[2];
1776 ifn = IFN_AVG_CEIL;
1779 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1781 /* We know that:
1783 (a) the operation can be viewed as:
1785 TYPE widened0 = (TYPE) UNPROM[0];
1786 TYPE widened1 = (TYPE) UNPROM[1];
1787 TYPE tmp1 = widened0 + widened1 {+ 1};
1788 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1790 (b) the first two statements are equivalent to:
1792 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1793 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1795 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1796 where sensible;
1798 (d) all the operations can be performed correctly at twice the width of
1799 NEW_TYPE, due to the nature of the average operation; and
1801 (e) users of the result of the right shift need only TARGET_PRECISION
1802 bits, where TARGET_PRECISION is no more than half of TYPE's
1803 precision.
1805 Under these circumstances, the only situation in which NEW_TYPE
1806 could be narrower than TARGET_PRECISION is if widened0, widened1
1807 and an addition result are all used more than once. Thus we can
1808 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1809 as "free", whereas widening the result of the average instruction
1810 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1811 therefore better not to go narrower than TARGET_PRECISION. */
1812 if (TYPE_PRECISION (new_type) < target_precision)
1813 new_type = build_nonstandard_integer_type (target_precision,
1814 TYPE_UNSIGNED (new_type));
1816 /* Check for target support. */
1817 tree new_vectype = get_vectype_for_scalar_type (new_type);
1818 if (!new_vectype
1819 || !direct_internal_fn_supported_p (ifn, new_vectype,
1820 OPTIMIZE_FOR_SPEED))
1821 return NULL;
1823 /* The IR requires a valid vector type for the cast result, even though
1824 it's likely to be discarded. */
1825 *type_out = get_vectype_for_scalar_type (type);
1826 if (!*type_out)
1827 return NULL;
1829 /* Generate the IFN_AVG* call. */
1830 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1831 tree new_ops[2];
1832 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1833 unprom, new_vectype);
1834 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1835 new_ops[1]);
1836 gimple_call_set_lhs (average_stmt, new_var);
1837 gimple_set_location (average_stmt, gimple_location (last_stmt));
1839 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_NOTE, vect_location,
1842 "created pattern stmt: ");
1843 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0);
1846 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1849 /* Recognize cases in which the input to a cast is wider than its
1850 output, and the input is fed by a widening operation. Fold this
1851 by removing the unnecessary intermediate widening. E.g.:
1853 unsigned char a;
1854 unsigned int b = (unsigned int) a;
1855 unsigned short c = (unsigned short) b;
1859 unsigned short c = (unsigned short) a;
1861 Although this is rare in input IR, it is an expected side-effect
1862 of the over-widening pattern above.
1864 This is beneficial also for integer-to-float conversions, if the
1865 widened integer has more bits than the float, and if the unwidened
1866 input doesn't. */
1868 static gimple *
1869 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1871 /* Check for a cast, including an integer-to-float conversion. */
1872 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1873 if (!last_stmt)
1874 return NULL;
1875 tree_code code = gimple_assign_rhs_code (last_stmt);
1876 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1877 return NULL;
1879 /* Make sure that the rhs is a scalar with a natural bitsize. */
1880 tree lhs = gimple_assign_lhs (last_stmt);
1881 if (!lhs)
1882 return NULL;
1883 tree lhs_type = TREE_TYPE (lhs);
1884 scalar_mode lhs_mode;
1885 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1886 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1887 return NULL;
1889 /* Check for a narrowing operation (from a vector point of view). */
1890 tree rhs = gimple_assign_rhs1 (last_stmt);
1891 tree rhs_type = TREE_TYPE (rhs);
1892 if (!INTEGRAL_TYPE_P (rhs_type)
1893 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1894 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1895 return NULL;
1897 /* Try to find an unpromoted input. */
1898 vec_info *vinfo = last_stmt_info->vinfo;
1899 vect_unpromoted_value unprom;
1900 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1901 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1902 return NULL;
1904 /* If the bits above RHS_TYPE matter, make sure that they're the
1905 same when extending from UNPROM as they are when extending from RHS. */
1906 if (!INTEGRAL_TYPE_P (lhs_type)
1907 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1908 return NULL;
1910 /* We can get the same result by casting UNPROM directly, to avoid
1911 the unnecessary widening and narrowing. */
1912 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1914 *type_out = get_vectype_for_scalar_type (lhs_type);
1915 if (!*type_out)
1916 return NULL;
1918 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1919 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1920 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1922 return pattern_stmt;
1925 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1926 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1928 static gimple *
1929 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1931 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
1932 WIDEN_LSHIFT_EXPR, true,
1933 "vect_recog_widen_shift_pattern");
1936 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1938 type a_t, b_t, c_t;
1940 S0 a_t = b_t r<< c_t;
1942 Input/Output:
1944 * STMT_VINFO: The stmt from which the pattern search begins,
1945 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1946 with a sequence:
1948 S1 d_t = -c_t;
1949 S2 e_t = d_t & (B - 1);
1950 S3 f_t = b_t << c_t;
1951 S4 g_t = b_t >> e_t;
1952 S0 a_t = f_t | g_t;
1954 where B is element bitsize of type.
1956 Output:
1958 * TYPE_OUT: The type of the output of this pattern.
1960 * Return value: A new stmt that will be used to replace the rotate
1961 S0 stmt. */
1963 static gimple *
1964 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1966 gimple *last_stmt = stmt_vinfo->stmt;
1967 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1968 gimple *pattern_stmt, *def_stmt;
1969 enum tree_code rhs_code;
1970 vec_info *vinfo = stmt_vinfo->vinfo;
1971 enum vect_def_type dt;
1972 optab optab1, optab2;
1973 edge ext_def = NULL;
1975 if (!is_gimple_assign (last_stmt))
1976 return NULL;
1978 rhs_code = gimple_assign_rhs_code (last_stmt);
1979 switch (rhs_code)
1981 case LROTATE_EXPR:
1982 case RROTATE_EXPR:
1983 break;
1984 default:
1985 return NULL;
1988 lhs = gimple_assign_lhs (last_stmt);
1989 oprnd0 = gimple_assign_rhs1 (last_stmt);
1990 type = TREE_TYPE (oprnd0);
1991 oprnd1 = gimple_assign_rhs2 (last_stmt);
1992 if (TREE_CODE (oprnd0) != SSA_NAME
1993 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1994 || !INTEGRAL_TYPE_P (type)
1995 || !TYPE_UNSIGNED (type))
1996 return NULL;
1998 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt))
1999 return NULL;
2001 if (dt != vect_internal_def
2002 && dt != vect_constant_def
2003 && dt != vect_external_def)
2004 return NULL;
2006 vectype = get_vectype_for_scalar_type (type);
2007 if (vectype == NULL_TREE)
2008 return NULL;
2010 /* If vector/vector or vector/scalar rotate is supported by the target,
2011 don't do anything here. */
2012 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2013 if (optab1
2014 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2015 return NULL;
2017 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2019 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2020 if (optab2
2021 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2022 return NULL;
2025 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2026 don't do anything here either. */
2027 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2028 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2029 if (!optab1
2030 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2031 || !optab2
2032 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2034 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2035 return NULL;
2036 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2037 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2038 if (!optab1
2039 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2040 || !optab2
2041 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2042 return NULL;
2045 *type_out = vectype;
2047 if (dt == vect_external_def
2048 && TREE_CODE (oprnd1) == SSA_NAME)
2049 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2051 def = NULL_TREE;
2052 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2053 if (TREE_CODE (oprnd1) == INTEGER_CST
2054 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2055 def = oprnd1;
2056 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2058 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2059 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2060 && TYPE_PRECISION (TREE_TYPE (rhs1))
2061 == TYPE_PRECISION (type))
2062 def = rhs1;
2065 if (def == NULL_TREE)
2067 def = vect_recog_temp_ssa_var (type, NULL);
2068 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2069 if (ext_def)
2071 basic_block new_bb
2072 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2073 gcc_assert (!new_bb);
2075 else
2076 append_pattern_def_seq (stmt_vinfo, def_stmt);
2078 stype = TREE_TYPE (def);
2079 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2081 if (TREE_CODE (def) == INTEGER_CST)
2083 if (!tree_fits_uhwi_p (def)
2084 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2085 || integer_zerop (def))
2086 return NULL;
2087 def2 = build_int_cst (stype,
2088 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2090 else
2092 tree vecstype = get_vectype_for_scalar_type (stype);
2093 stmt_vec_info def_stmt_vinfo;
2095 if (vecstype == NULL_TREE)
2096 return NULL;
2097 def2 = vect_recog_temp_ssa_var (stype, NULL);
2098 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2099 if (ext_def)
2101 basic_block new_bb
2102 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2103 gcc_assert (!new_bb);
2105 else
2107 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2108 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2109 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2110 append_pattern_def_seq (stmt_vinfo, def_stmt);
2113 def2 = vect_recog_temp_ssa_var (stype, NULL);
2114 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2115 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2116 gimple_assign_lhs (def_stmt), mask);
2117 if (ext_def)
2119 basic_block new_bb
2120 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2121 gcc_assert (!new_bb);
2123 else
2125 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2126 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2127 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2128 append_pattern_def_seq (stmt_vinfo, def_stmt);
2132 var1 = vect_recog_temp_ssa_var (type, NULL);
2133 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2134 ? LSHIFT_EXPR : RSHIFT_EXPR,
2135 oprnd0, def);
2136 append_pattern_def_seq (stmt_vinfo, def_stmt);
2138 var2 = vect_recog_temp_ssa_var (type, NULL);
2139 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2140 ? RSHIFT_EXPR : LSHIFT_EXPR,
2141 oprnd0, def2);
2142 append_pattern_def_seq (stmt_vinfo, def_stmt);
2144 /* Pattern detected. */
2145 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2147 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2148 var = vect_recog_temp_ssa_var (type, NULL);
2149 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2151 return pattern_stmt;
2154 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2155 vectorized:
2157 type a_t;
2158 TYPE b_T, res_T;
2160 S1 a_t = ;
2161 S2 b_T = ;
2162 S3 res_T = b_T op a_t;
2164 where type 'TYPE' is a type with different size than 'type',
2165 and op is <<, >> or rotate.
2167 Also detect cases:
2169 type a_t;
2170 TYPE b_T, c_T, res_T;
2172 S0 c_T = ;
2173 S1 a_t = (type) c_T;
2174 S2 b_T = ;
2175 S3 res_T = b_T op a_t;
2177 Input/Output:
2179 * STMT_VINFO: The stmt from which the pattern search begins,
2180 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2181 with a shift/rotate which has same type on both operands, in the
2182 second case just b_T op c_T, in the first case with added cast
2183 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2185 Output:
2187 * TYPE_OUT: The type of the output of this pattern.
2189 * Return value: A new stmt that will be used to replace the shift/rotate
2190 S3 stmt. */
2192 static gimple *
2193 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2194 tree *type_out)
2196 gimple *last_stmt = stmt_vinfo->stmt;
2197 tree oprnd0, oprnd1, lhs, var;
2198 gimple *pattern_stmt;
2199 enum tree_code rhs_code;
2200 vec_info *vinfo = stmt_vinfo->vinfo;
2202 if (!is_gimple_assign (last_stmt))
2203 return NULL;
2205 rhs_code = gimple_assign_rhs_code (last_stmt);
2206 switch (rhs_code)
2208 case LSHIFT_EXPR:
2209 case RSHIFT_EXPR:
2210 case LROTATE_EXPR:
2211 case RROTATE_EXPR:
2212 break;
2213 default:
2214 return NULL;
2217 lhs = gimple_assign_lhs (last_stmt);
2218 oprnd0 = gimple_assign_rhs1 (last_stmt);
2219 oprnd1 = gimple_assign_rhs2 (last_stmt);
2220 if (TREE_CODE (oprnd0) != SSA_NAME
2221 || TREE_CODE (oprnd1) != SSA_NAME
2222 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2223 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2224 || TYPE_PRECISION (TREE_TYPE (lhs))
2225 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2226 return NULL;
2228 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2229 if (!def_vinfo)
2230 return NULL;
2232 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2233 if (*type_out == NULL_TREE)
2234 return NULL;
2236 tree def = NULL_TREE;
2237 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2238 if (def_stmt && gimple_assign_cast_p (def_stmt))
2240 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2241 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2242 && TYPE_PRECISION (TREE_TYPE (rhs1))
2243 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2245 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2246 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2247 def = rhs1;
2248 else
2250 tree mask
2251 = build_low_bits_mask (TREE_TYPE (rhs1),
2252 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2253 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2254 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2255 stmt_vec_info new_stmt_info
2256 = new_stmt_vec_info (def_stmt, vinfo);
2257 set_vinfo_for_stmt (def_stmt, new_stmt_info);
2258 STMT_VINFO_VECTYPE (new_stmt_info)
2259 = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2260 append_pattern_def_seq (stmt_vinfo, def_stmt);
2265 if (def == NULL_TREE)
2267 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2268 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2269 append_pattern_def_seq (stmt_vinfo, def_stmt);
2272 /* Pattern detected. */
2273 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2275 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2276 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2277 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2279 return pattern_stmt;
2282 /* Return true iff the target has a vector optab implementing the operation
2283 CODE on type VECTYPE. */
2285 static bool
2286 target_has_vecop_for_code (tree_code code, tree vectype)
2288 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2289 return voptab
2290 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2293 /* Verify that the target has optabs of VECTYPE to perform all the steps
2294 needed by the multiplication-by-immediate synthesis algorithm described by
2295 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2296 present. Return true iff the target supports all the steps. */
2298 static bool
2299 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2300 tree vectype, bool synth_shift_p)
2302 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2303 return false;
2305 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2306 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2308 if (var == negate_variant
2309 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2310 return false;
2312 /* If we must synthesize shifts with additions make sure that vector
2313 addition is available. */
2314 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2315 return false;
2317 for (int i = 1; i < alg->ops; i++)
2319 switch (alg->op[i])
2321 case alg_shift:
2322 break;
2323 case alg_add_t_m2:
2324 case alg_add_t2_m:
2325 case alg_add_factor:
2326 if (!supports_vplus)
2327 return false;
2328 break;
2329 case alg_sub_t_m2:
2330 case alg_sub_t2_m:
2331 case alg_sub_factor:
2332 if (!supports_vminus)
2333 return false;
2334 break;
2335 case alg_unknown:
2336 case alg_m:
2337 case alg_zero:
2338 case alg_impossible:
2339 return false;
2340 default:
2341 gcc_unreachable ();
2345 return true;
2348 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2349 putting the final result in DEST. Append all statements but the last into
2350 VINFO. Return the last statement. */
2352 static gimple *
2353 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2354 stmt_vec_info vinfo)
2356 HOST_WIDE_INT i;
2357 tree itype = TREE_TYPE (op);
2358 tree prev_res = op;
2359 gcc_assert (amnt >= 0);
2360 for (i = 0; i < amnt; i++)
2362 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2363 : dest;
2364 gimple *stmt
2365 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2366 prev_res = tmp_var;
2367 if (i < amnt - 1)
2368 append_pattern_def_seq (vinfo, stmt);
2369 else
2370 return stmt;
2372 gcc_unreachable ();
2373 return NULL;
2376 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2377 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2378 the process if necessary. Append the resulting assignment statements
2379 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2380 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2381 left shifts using additions. */
2383 static tree
2384 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2385 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2387 if (integer_zerop (op2)
2388 && (code == LSHIFT_EXPR
2389 || code == PLUS_EXPR))
2391 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2392 return op1;
2395 gimple *stmt;
2396 tree itype = TREE_TYPE (op1);
2397 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2399 if (code == LSHIFT_EXPR
2400 && synth_shift_p)
2402 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2403 stmt_vinfo);
2404 append_pattern_def_seq (stmt_vinfo, stmt);
2405 return tmp_var;
2408 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2409 append_pattern_def_seq (stmt_vinfo, stmt);
2410 return tmp_var;
2413 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2414 and simple arithmetic operations to be vectorized. Record the statements
2415 produced in STMT_VINFO and return the last statement in the sequence or
2416 NULL if it's not possible to synthesize such a multiplication.
2417 This function mirrors the behavior of expand_mult_const in expmed.c but
2418 works on tree-ssa form. */
2420 static gimple *
2421 vect_synth_mult_by_constant (tree op, tree val,
2422 stmt_vec_info stmt_vinfo)
2424 tree itype = TREE_TYPE (op);
2425 machine_mode mode = TYPE_MODE (itype);
2426 struct algorithm alg;
2427 mult_variant variant;
2428 if (!tree_fits_shwi_p (val))
2429 return NULL;
2431 /* Multiplication synthesis by shifts, adds and subs can introduce
2432 signed overflow where the original operation didn't. Perform the
2433 operations on an unsigned type and cast back to avoid this.
2434 In the future we may want to relax this for synthesis algorithms
2435 that we can prove do not cause unexpected overflow. */
2436 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2438 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2440 /* Targets that don't support vector shifts but support vector additions
2441 can synthesize shifts that way. */
2442 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2444 HOST_WIDE_INT hwval = tree_to_shwi (val);
2445 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2446 The vectorizer's benefit analysis will decide whether it's beneficial
2447 to do this. */
2448 bool possible = choose_mult_variant (mode, hwval, &alg,
2449 &variant, MAX_COST);
2450 if (!possible)
2451 return NULL;
2453 tree vectype = get_vectype_for_scalar_type (multtype);
2455 if (!vectype
2456 || !target_supports_mult_synth_alg (&alg, variant,
2457 vectype, synth_shift_p))
2458 return NULL;
2460 tree accumulator;
2462 /* Clear out the sequence of statements so we can populate it below. */
2463 gimple *stmt = NULL;
2465 if (cast_to_unsigned_p)
2467 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2468 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2469 append_pattern_def_seq (stmt_vinfo, stmt);
2470 op = tmp_op;
2473 if (alg.op[0] == alg_zero)
2474 accumulator = build_int_cst (multtype, 0);
2475 else
2476 accumulator = op;
2478 bool needs_fixup = (variant == negate_variant)
2479 || (variant == add_variant);
2481 for (int i = 1; i < alg.ops; i++)
2483 tree shft_log = build_int_cst (multtype, alg.log[i]);
2484 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2485 tree tmp_var = NULL_TREE;
2487 switch (alg.op[i])
2489 case alg_shift:
2490 if (synth_shift_p)
2491 stmt
2492 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2493 stmt_vinfo);
2494 else
2495 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2496 shft_log);
2497 break;
2498 case alg_add_t_m2:
2499 tmp_var
2500 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2501 stmt_vinfo, synth_shift_p);
2502 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2503 tmp_var);
2504 break;
2505 case alg_sub_t_m2:
2506 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2507 shft_log, stmt_vinfo,
2508 synth_shift_p);
2509 /* In some algorithms the first step involves zeroing the
2510 accumulator. If subtracting from such an accumulator
2511 just emit the negation directly. */
2512 if (integer_zerop (accumulator))
2513 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2514 else
2515 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2516 tmp_var);
2517 break;
2518 case alg_add_t2_m:
2519 tmp_var
2520 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2521 stmt_vinfo, synth_shift_p);
2522 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2523 break;
2524 case alg_sub_t2_m:
2525 tmp_var
2526 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2527 stmt_vinfo, synth_shift_p);
2528 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2529 break;
2530 case alg_add_factor:
2531 tmp_var
2532 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2533 stmt_vinfo, synth_shift_p);
2534 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2535 tmp_var);
2536 break;
2537 case alg_sub_factor:
2538 tmp_var
2539 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2540 stmt_vinfo, synth_shift_p);
2541 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2542 accumulator);
2543 break;
2544 default:
2545 gcc_unreachable ();
2547 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2548 but rather return it directly. */
2550 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2551 append_pattern_def_seq (stmt_vinfo, stmt);
2552 accumulator = accum_tmp;
2554 if (variant == negate_variant)
2556 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2557 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2558 accumulator = accum_tmp;
2559 if (cast_to_unsigned_p)
2560 append_pattern_def_seq (stmt_vinfo, stmt);
2562 else if (variant == add_variant)
2564 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2565 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2566 accumulator = accum_tmp;
2567 if (cast_to_unsigned_p)
2568 append_pattern_def_seq (stmt_vinfo, stmt);
2570 /* Move back to a signed if needed. */
2571 if (cast_to_unsigned_p)
2573 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2574 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2577 return stmt;
2580 /* Detect multiplication by constant and convert it into a sequence of
2581 shifts and additions, subtractions, negations. We reuse the
2582 choose_mult_variant algorithms from expmed.c
2584 Input/Output:
2586 STMT_VINFO: The stmt from which the pattern search begins,
2587 i.e. the mult stmt.
2589 Output:
2591 * TYPE_OUT: The type of the output of this pattern.
2593 * Return value: A new stmt that will be used to replace
2594 the multiplication. */
2596 static gimple *
2597 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2599 gimple *last_stmt = stmt_vinfo->stmt;
2600 tree oprnd0, oprnd1, vectype, itype;
2601 gimple *pattern_stmt;
2603 if (!is_gimple_assign (last_stmt))
2604 return NULL;
2606 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2607 return NULL;
2609 oprnd0 = gimple_assign_rhs1 (last_stmt);
2610 oprnd1 = gimple_assign_rhs2 (last_stmt);
2611 itype = TREE_TYPE (oprnd0);
2613 if (TREE_CODE (oprnd0) != SSA_NAME
2614 || TREE_CODE (oprnd1) != INTEGER_CST
2615 || !INTEGRAL_TYPE_P (itype)
2616 || !type_has_mode_precision_p (itype))
2617 return NULL;
2619 vectype = get_vectype_for_scalar_type (itype);
2620 if (vectype == NULL_TREE)
2621 return NULL;
2623 /* If the target can handle vectorized multiplication natively,
2624 don't attempt to optimize this. */
2625 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2626 if (mul_optab != unknown_optab)
2628 machine_mode vec_mode = TYPE_MODE (vectype);
2629 int icode = (int) optab_handler (mul_optab, vec_mode);
2630 if (icode != CODE_FOR_nothing)
2631 return NULL;
2634 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2635 if (!pattern_stmt)
2636 return NULL;
2638 /* Pattern detected. */
2639 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2641 *type_out = vectype;
2643 return pattern_stmt;
2646 /* Detect a signed division by a constant that wouldn't be
2647 otherwise vectorized:
2649 type a_t, b_t;
2651 S1 a_t = b_t / N;
2653 where type 'type' is an integral type and N is a constant.
2655 Similarly handle modulo by a constant:
2657 S4 a_t = b_t % N;
2659 Input/Output:
2661 * STMT_VINFO: The stmt from which the pattern search begins,
2662 i.e. the division stmt. S1 is replaced by if N is a power
2663 of two constant and type is signed:
2664 S3 y_t = b_t < 0 ? N - 1 : 0;
2665 S2 x_t = b_t + y_t;
2666 S1' a_t = x_t >> log2 (N);
2668 S4 is replaced if N is a power of two constant and
2669 type is signed by (where *_T temporaries have unsigned type):
2670 S9 y_T = b_t < 0 ? -1U : 0U;
2671 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2672 S7 z_t = (type) z_T;
2673 S6 w_t = b_t + z_t;
2674 S5 x_t = w_t & (N - 1);
2675 S4' a_t = x_t - z_t;
2677 Output:
2679 * TYPE_OUT: The type of the output of this pattern.
2681 * Return value: A new stmt that will be used to replace the division
2682 S1 or modulo S4 stmt. */
2684 static gimple *
2685 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2687 gimple *last_stmt = stmt_vinfo->stmt;
2688 tree oprnd0, oprnd1, vectype, itype, cond;
2689 gimple *pattern_stmt, *def_stmt;
2690 enum tree_code rhs_code;
2691 vec_info *vinfo = stmt_vinfo->vinfo;
2692 optab optab;
2693 tree q;
2694 int dummy_int, prec;
2695 stmt_vec_info def_stmt_vinfo;
2697 if (!is_gimple_assign (last_stmt))
2698 return NULL;
2700 rhs_code = gimple_assign_rhs_code (last_stmt);
2701 switch (rhs_code)
2703 case TRUNC_DIV_EXPR:
2704 case TRUNC_MOD_EXPR:
2705 break;
2706 default:
2707 return NULL;
2710 oprnd0 = gimple_assign_rhs1 (last_stmt);
2711 oprnd1 = gimple_assign_rhs2 (last_stmt);
2712 itype = TREE_TYPE (oprnd0);
2713 if (TREE_CODE (oprnd0) != SSA_NAME
2714 || TREE_CODE (oprnd1) != INTEGER_CST
2715 || TREE_CODE (itype) != INTEGER_TYPE
2716 || !type_has_mode_precision_p (itype))
2717 return NULL;
2719 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2720 vectype = get_vectype_for_scalar_type (itype);
2721 if (vectype == NULL_TREE)
2722 return NULL;
2724 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2726 /* If the target can handle vectorized division or modulo natively,
2727 don't attempt to optimize this, since native division is likely
2728 to give smaller code. */
2729 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2730 if (optab != unknown_optab)
2732 machine_mode vec_mode = TYPE_MODE (vectype);
2733 int icode = (int) optab_handler (optab, vec_mode);
2734 if (icode != CODE_FOR_nothing)
2735 return NULL;
2739 prec = TYPE_PRECISION (itype);
2740 if (integer_pow2p (oprnd1))
2742 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2743 return NULL;
2745 /* Pattern detected. */
2746 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2748 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2749 build_int_cst (itype, 0));
2750 if (rhs_code == TRUNC_DIV_EXPR)
2752 tree var = vect_recog_temp_ssa_var (itype, NULL);
2753 tree shift;
2754 def_stmt
2755 = gimple_build_assign (var, COND_EXPR, cond,
2756 fold_build2 (MINUS_EXPR, itype, oprnd1,
2757 build_int_cst (itype, 1)),
2758 build_int_cst (itype, 0));
2759 append_pattern_def_seq (stmt_vinfo, def_stmt);
2760 var = vect_recog_temp_ssa_var (itype, NULL);
2761 def_stmt
2762 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2763 gimple_assign_lhs (def_stmt));
2764 append_pattern_def_seq (stmt_vinfo, def_stmt);
2766 shift = build_int_cst (itype, tree_log2 (oprnd1));
2767 pattern_stmt
2768 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2769 RSHIFT_EXPR, var, shift);
2771 else
2773 tree signmask;
2774 if (compare_tree_int (oprnd1, 2) == 0)
2776 signmask = vect_recog_temp_ssa_var (itype, NULL);
2777 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2778 build_int_cst (itype, 1),
2779 build_int_cst (itype, 0));
2780 append_pattern_def_seq (stmt_vinfo, def_stmt);
2782 else
2784 tree utype
2785 = build_nonstandard_integer_type (prec, 1);
2786 tree vecutype = get_vectype_for_scalar_type (utype);
2787 tree shift
2788 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2789 - tree_log2 (oprnd1));
2790 tree var = vect_recog_temp_ssa_var (utype, NULL);
2792 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2793 build_int_cst (utype, -1),
2794 build_int_cst (utype, 0));
2795 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2796 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2797 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2798 append_pattern_def_seq (stmt_vinfo, def_stmt);
2799 var = vect_recog_temp_ssa_var (utype, NULL);
2800 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2801 gimple_assign_lhs (def_stmt),
2802 shift);
2803 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2804 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2805 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2806 append_pattern_def_seq (stmt_vinfo, def_stmt);
2807 signmask = vect_recog_temp_ssa_var (itype, NULL);
2808 def_stmt
2809 = gimple_build_assign (signmask, NOP_EXPR, var);
2810 append_pattern_def_seq (stmt_vinfo, def_stmt);
2812 def_stmt
2813 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2814 PLUS_EXPR, oprnd0, signmask);
2815 append_pattern_def_seq (stmt_vinfo, def_stmt);
2816 def_stmt
2817 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2818 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2819 fold_build2 (MINUS_EXPR, itype, oprnd1,
2820 build_int_cst (itype, 1)));
2821 append_pattern_def_seq (stmt_vinfo, def_stmt);
2823 pattern_stmt
2824 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2825 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2826 signmask);
2829 *type_out = vectype;
2830 return pattern_stmt;
2833 if (prec > HOST_BITS_PER_WIDE_INT
2834 || integer_zerop (oprnd1))
2835 return NULL;
2837 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2838 return NULL;
2840 if (TYPE_UNSIGNED (itype))
2842 unsigned HOST_WIDE_INT mh, ml;
2843 int pre_shift, post_shift;
2844 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2845 & GET_MODE_MASK (itype_mode));
2846 tree t1, t2, t3, t4;
2848 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2849 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2850 return NULL;
2852 /* Find a suitable multiplier and right shift count
2853 instead of multiplying with D. */
2854 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2856 /* If the suggested multiplier is more than SIZE bits, we can do better
2857 for even divisors, using an initial right shift. */
2858 if (mh != 0 && (d & 1) == 0)
2860 pre_shift = ctz_or_zero (d);
2861 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2862 &ml, &post_shift, &dummy_int);
2863 gcc_assert (!mh);
2865 else
2866 pre_shift = 0;
2868 if (mh != 0)
2870 if (post_shift - 1 >= prec)
2871 return NULL;
2873 /* t1 = oprnd0 h* ml;
2874 t2 = oprnd0 - t1;
2875 t3 = t2 >> 1;
2876 t4 = t1 + t3;
2877 q = t4 >> (post_shift - 1); */
2878 t1 = vect_recog_temp_ssa_var (itype, NULL);
2879 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2880 build_int_cst (itype, ml));
2881 append_pattern_def_seq (stmt_vinfo, def_stmt);
2883 t2 = vect_recog_temp_ssa_var (itype, NULL);
2884 def_stmt
2885 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2886 append_pattern_def_seq (stmt_vinfo, def_stmt);
2888 t3 = vect_recog_temp_ssa_var (itype, NULL);
2889 def_stmt
2890 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2891 append_pattern_def_seq (stmt_vinfo, def_stmt);
2893 t4 = vect_recog_temp_ssa_var (itype, NULL);
2894 def_stmt
2895 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2897 if (post_shift != 1)
2899 append_pattern_def_seq (stmt_vinfo, def_stmt);
2901 q = vect_recog_temp_ssa_var (itype, NULL);
2902 pattern_stmt
2903 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2904 build_int_cst (itype, post_shift - 1));
2906 else
2908 q = t4;
2909 pattern_stmt = def_stmt;
2912 else
2914 if (pre_shift >= prec || post_shift >= prec)
2915 return NULL;
2917 /* t1 = oprnd0 >> pre_shift;
2918 t2 = t1 h* ml;
2919 q = t2 >> post_shift; */
2920 if (pre_shift)
2922 t1 = vect_recog_temp_ssa_var (itype, NULL);
2923 def_stmt
2924 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2925 build_int_cst (NULL, pre_shift));
2926 append_pattern_def_seq (stmt_vinfo, def_stmt);
2928 else
2929 t1 = oprnd0;
2931 t2 = vect_recog_temp_ssa_var (itype, NULL);
2932 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2933 build_int_cst (itype, ml));
2935 if (post_shift)
2937 append_pattern_def_seq (stmt_vinfo, def_stmt);
2939 q = vect_recog_temp_ssa_var (itype, NULL);
2940 def_stmt
2941 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2942 build_int_cst (itype, post_shift));
2944 else
2945 q = t2;
2947 pattern_stmt = def_stmt;
2950 else
2952 unsigned HOST_WIDE_INT ml;
2953 int post_shift;
2954 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2955 unsigned HOST_WIDE_INT abs_d;
2956 bool add = false;
2957 tree t1, t2, t3, t4;
2959 /* Give up for -1. */
2960 if (d == -1)
2961 return NULL;
2963 /* Since d might be INT_MIN, we have to cast to
2964 unsigned HOST_WIDE_INT before negating to avoid
2965 undefined signed overflow. */
2966 abs_d = (d >= 0
2967 ? (unsigned HOST_WIDE_INT) d
2968 : - (unsigned HOST_WIDE_INT) d);
2970 /* n rem d = n rem -d */
2971 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2973 d = abs_d;
2974 oprnd1 = build_int_cst (itype, abs_d);
2976 else if (HOST_BITS_PER_WIDE_INT >= prec
2977 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2978 /* This case is not handled correctly below. */
2979 return NULL;
2981 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2982 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2984 add = true;
2985 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2987 if (post_shift >= prec)
2988 return NULL;
2990 /* t1 = oprnd0 h* ml; */
2991 t1 = vect_recog_temp_ssa_var (itype, NULL);
2992 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2993 build_int_cst (itype, ml));
2995 if (add)
2997 /* t2 = t1 + oprnd0; */
2998 append_pattern_def_seq (stmt_vinfo, def_stmt);
2999 t2 = vect_recog_temp_ssa_var (itype, NULL);
3000 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3002 else
3003 t2 = t1;
3005 if (post_shift)
3007 /* t3 = t2 >> post_shift; */
3008 append_pattern_def_seq (stmt_vinfo, def_stmt);
3009 t3 = vect_recog_temp_ssa_var (itype, NULL);
3010 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3011 build_int_cst (itype, post_shift));
3013 else
3014 t3 = t2;
3016 wide_int oprnd0_min, oprnd0_max;
3017 int msb = 1;
3018 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3020 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3021 msb = 0;
3022 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3023 msb = -1;
3026 if (msb == 0 && d >= 0)
3028 /* q = t3; */
3029 q = t3;
3030 pattern_stmt = def_stmt;
3032 else
3034 /* t4 = oprnd0 >> (prec - 1);
3035 or if we know from VRP that oprnd0 >= 0
3036 t4 = 0;
3037 or if we know from VRP that oprnd0 < 0
3038 t4 = -1; */
3039 append_pattern_def_seq (stmt_vinfo, def_stmt);
3040 t4 = vect_recog_temp_ssa_var (itype, NULL);
3041 if (msb != 1)
3042 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3043 build_int_cst (itype, msb));
3044 else
3045 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3046 build_int_cst (itype, prec - 1));
3047 append_pattern_def_seq (stmt_vinfo, def_stmt);
3049 /* q = t3 - t4; or q = t4 - t3; */
3050 q = vect_recog_temp_ssa_var (itype, NULL);
3051 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3052 d < 0 ? t3 : t4);
3056 if (rhs_code == TRUNC_MOD_EXPR)
3058 tree r, t1;
3060 /* We divided. Now finish by:
3061 t1 = q * oprnd1;
3062 r = oprnd0 - t1; */
3063 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3065 t1 = vect_recog_temp_ssa_var (itype, NULL);
3066 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3067 append_pattern_def_seq (stmt_vinfo, def_stmt);
3069 r = vect_recog_temp_ssa_var (itype, NULL);
3070 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3073 /* Pattern detected. */
3074 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3076 *type_out = vectype;
3077 return pattern_stmt;
3080 /* Function vect_recog_mixed_size_cond_pattern
3082 Try to find the following pattern:
3084 type x_t, y_t;
3085 TYPE a_T, b_T, c_T;
3086 loop:
3087 S1 a_T = x_t CMP y_t ? b_T : c_T;
3089 where type 'TYPE' is an integral type which has different size
3090 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3091 than 'type', the constants need to fit into an integer type
3092 with the same width as 'type') or results of conversion from 'type'.
3094 Input:
3096 * STMT_VINFO: The stmt from which the pattern search begins.
3098 Output:
3100 * TYPE_OUT: The type of the output of this pattern.
3102 * Return value: A new stmt that will be used to replace the pattern.
3103 Additionally a def_stmt is added.
3105 a_it = x_t CMP y_t ? b_it : c_it;
3106 a_T = (TYPE) a_it; */
3108 static gimple *
3109 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3111 gimple *last_stmt = stmt_vinfo->stmt;
3112 tree cond_expr, then_clause, else_clause;
3113 stmt_vec_info def_stmt_info;
3114 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3115 gimple *pattern_stmt, *def_stmt;
3116 vec_info *vinfo = stmt_vinfo->vinfo;
3117 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3118 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3119 bool promotion;
3120 tree comp_scalar_type;
3122 if (!is_gimple_assign (last_stmt)
3123 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3124 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3125 return NULL;
3127 cond_expr = gimple_assign_rhs1 (last_stmt);
3128 then_clause = gimple_assign_rhs2 (last_stmt);
3129 else_clause = gimple_assign_rhs3 (last_stmt);
3131 if (!COMPARISON_CLASS_P (cond_expr))
3132 return NULL;
3134 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3135 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3136 if (comp_vectype == NULL_TREE)
3137 return NULL;
3139 type = gimple_expr_type (last_stmt);
3140 if (types_compatible_p (type, comp_scalar_type)
3141 || ((TREE_CODE (then_clause) != INTEGER_CST
3142 || TREE_CODE (else_clause) != INTEGER_CST)
3143 && !INTEGRAL_TYPE_P (comp_scalar_type))
3144 || !INTEGRAL_TYPE_P (type))
3145 return NULL;
3147 if ((TREE_CODE (then_clause) != INTEGER_CST
3148 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3149 &def_stmt0, &promotion))
3150 || (TREE_CODE (else_clause) != INTEGER_CST
3151 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3152 &def_stmt1, &promotion)))
3153 return NULL;
3155 if (orig_type0 && orig_type1
3156 && !types_compatible_p (orig_type0, orig_type1))
3157 return NULL;
3159 if (orig_type0)
3161 if (!types_compatible_p (orig_type0, comp_scalar_type))
3162 return NULL;
3163 then_clause = gimple_assign_rhs1 (def_stmt0);
3164 itype = orig_type0;
3167 if (orig_type1)
3169 if (!types_compatible_p (orig_type1, comp_scalar_type))
3170 return NULL;
3171 else_clause = gimple_assign_rhs1 (def_stmt1);
3172 itype = orig_type1;
3176 HOST_WIDE_INT cmp_mode_size
3177 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3179 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3180 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3181 return NULL;
3183 vectype = get_vectype_for_scalar_type (type);
3184 if (vectype == NULL_TREE)
3185 return NULL;
3187 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3188 return NULL;
3190 if (itype == NULL_TREE)
3191 itype = build_nonstandard_integer_type (cmp_mode_size,
3192 TYPE_UNSIGNED (type));
3194 if (itype == NULL_TREE
3195 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3196 return NULL;
3198 vecitype = get_vectype_for_scalar_type (itype);
3199 if (vecitype == NULL_TREE)
3200 return NULL;
3202 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3203 return NULL;
3205 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3207 if ((TREE_CODE (then_clause) == INTEGER_CST
3208 && !int_fits_type_p (then_clause, itype))
3209 || (TREE_CODE (else_clause) == INTEGER_CST
3210 && !int_fits_type_p (else_clause, itype)))
3211 return NULL;
3214 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3215 COND_EXPR, unshare_expr (cond_expr),
3216 fold_convert (itype, then_clause),
3217 fold_convert (itype, else_clause));
3218 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3219 NOP_EXPR, gimple_assign_lhs (def_stmt));
3221 append_pattern_def_seq (stmt_vinfo, def_stmt);
3222 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3223 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3224 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3225 *type_out = vectype;
3227 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3229 return pattern_stmt;
3233 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3234 true if bool VAR can and should be optimized that way. Assume it shouldn't
3235 in case it's a result of a comparison which can be directly vectorized into
3236 a vector comparison. Fills in STMTS with all stmts visited during the
3237 walk. */
3239 static bool
3240 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3242 tree rhs1;
3243 enum tree_code rhs_code;
3245 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3246 if (!def_stmt_info)
3247 return false;
3249 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3250 if (!def_stmt)
3251 return false;
3253 if (stmts.contains (def_stmt))
3254 return true;
3256 rhs1 = gimple_assign_rhs1 (def_stmt);
3257 rhs_code = gimple_assign_rhs_code (def_stmt);
3258 switch (rhs_code)
3260 case SSA_NAME:
3261 if (! check_bool_pattern (rhs1, vinfo, stmts))
3262 return false;
3263 break;
3265 CASE_CONVERT:
3266 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3267 return false;
3268 if (! check_bool_pattern (rhs1, vinfo, stmts))
3269 return false;
3270 break;
3272 case BIT_NOT_EXPR:
3273 if (! check_bool_pattern (rhs1, vinfo, stmts))
3274 return false;
3275 break;
3277 case BIT_AND_EXPR:
3278 case BIT_IOR_EXPR:
3279 case BIT_XOR_EXPR:
3280 if (! check_bool_pattern (rhs1, vinfo, stmts)
3281 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3282 return false;
3283 break;
3285 default:
3286 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3288 tree vecitype, comp_vectype;
3290 /* If the comparison can throw, then is_gimple_condexpr will be
3291 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3292 if (stmt_could_throw_p (def_stmt))
3293 return false;
3295 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3296 if (comp_vectype == NULL_TREE)
3297 return false;
3299 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3300 if (mask_type
3301 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3302 return false;
3304 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3306 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3307 tree itype
3308 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3309 vecitype = get_vectype_for_scalar_type (itype);
3310 if (vecitype == NULL_TREE)
3311 return false;
3313 else
3314 vecitype = comp_vectype;
3315 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3316 return false;
3318 else
3319 return false;
3320 break;
3323 bool res = stmts.add (def_stmt);
3324 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3325 gcc_assert (!res);
3327 return true;
3331 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3332 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3333 pattern sequence. */
3335 static tree
3336 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3338 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3339 NOP_EXPR, var);
3340 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3341 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3342 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3343 append_pattern_def_seq (stmt_info, cast_stmt);
3344 return gimple_assign_lhs (cast_stmt);
3347 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3348 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3349 type, OUT_TYPE is the desired final integer type of the whole pattern.
3350 STMT_INFO is the info of the pattern root and is where pattern stmts should
3351 be associated with. DEFS is a map of pattern defs. */
3353 static void
3354 adjust_bool_pattern (tree var, tree out_type,
3355 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3357 gimple *stmt = SSA_NAME_DEF_STMT (var);
3358 enum tree_code rhs_code, def_rhs_code;
3359 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3360 location_t loc;
3361 gimple *pattern_stmt, *def_stmt;
3362 tree trueval = NULL_TREE;
3364 rhs1 = gimple_assign_rhs1 (stmt);
3365 rhs2 = gimple_assign_rhs2 (stmt);
3366 rhs_code = gimple_assign_rhs_code (stmt);
3367 loc = gimple_location (stmt);
3368 switch (rhs_code)
3370 case SSA_NAME:
3371 CASE_CONVERT:
3372 irhs1 = *defs.get (rhs1);
3373 itype = TREE_TYPE (irhs1);
3374 pattern_stmt
3375 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3376 SSA_NAME, irhs1);
3377 break;
3379 case BIT_NOT_EXPR:
3380 irhs1 = *defs.get (rhs1);
3381 itype = TREE_TYPE (irhs1);
3382 pattern_stmt
3383 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3384 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3385 break;
3387 case BIT_AND_EXPR:
3388 /* Try to optimize x = y & (a < b ? 1 : 0); into
3389 x = (a < b ? y : 0);
3391 E.g. for:
3392 bool a_b, b_b, c_b;
3393 TYPE d_T;
3395 S1 a_b = x1 CMP1 y1;
3396 S2 b_b = x2 CMP2 y2;
3397 S3 c_b = a_b & b_b;
3398 S4 d_T = (TYPE) c_b;
3400 we would normally emit:
3402 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3403 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3404 S3' c_T = a_T & b_T;
3405 S4' d_T = c_T;
3407 but we can save one stmt by using the
3408 result of one of the COND_EXPRs in the other COND_EXPR and leave
3409 BIT_AND_EXPR stmt out:
3411 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3412 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3413 S4' f_T = c_T;
3415 At least when VEC_COND_EXPR is implemented using masks
3416 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3417 computes the comparison masks and ands it, in one case with
3418 all ones vector, in the other case with a vector register.
3419 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3420 often more expensive. */
3421 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3422 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3423 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3425 irhs1 = *defs.get (rhs1);
3426 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3427 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3428 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3430 rhs_code = def_rhs_code;
3431 rhs1 = def_rhs1;
3432 rhs2 = gimple_assign_rhs2 (def_stmt);
3433 trueval = irhs1;
3434 goto do_compare;
3436 else
3437 irhs2 = *defs.get (rhs2);
3438 goto and_ior_xor;
3440 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3441 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3442 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3444 irhs2 = *defs.get (rhs2);
3445 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3446 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3447 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3449 rhs_code = def_rhs_code;
3450 rhs1 = def_rhs1;
3451 rhs2 = gimple_assign_rhs2 (def_stmt);
3452 trueval = irhs2;
3453 goto do_compare;
3455 else
3456 irhs1 = *defs.get (rhs1);
3457 goto and_ior_xor;
3459 /* FALLTHRU */
3460 case BIT_IOR_EXPR:
3461 case BIT_XOR_EXPR:
3462 irhs1 = *defs.get (rhs1);
3463 irhs2 = *defs.get (rhs2);
3464 and_ior_xor:
3465 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3466 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3468 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3469 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3470 int out_prec = TYPE_PRECISION (out_type);
3471 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3472 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3473 stmt_info);
3474 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3475 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3476 stmt_info);
3477 else
3479 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3480 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3483 itype = TREE_TYPE (irhs1);
3484 pattern_stmt
3485 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3486 rhs_code, irhs1, irhs2);
3487 break;
3489 default:
3490 do_compare:
3491 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3492 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3493 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3494 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3495 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3497 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3498 itype
3499 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3501 else
3502 itype = TREE_TYPE (rhs1);
3503 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3504 if (trueval == NULL_TREE)
3505 trueval = build_int_cst (itype, 1);
3506 else
3507 gcc_checking_assert (useless_type_conversion_p (itype,
3508 TREE_TYPE (trueval)));
3509 pattern_stmt
3510 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3511 COND_EXPR, cond_expr, trueval,
3512 build_int_cst (itype, 0));
3513 break;
3516 gimple_set_location (pattern_stmt, loc);
3517 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3518 pattern def seq stmts instead of just letting auto-detection do
3519 its work? */
3520 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3521 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3522 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3523 append_pattern_def_seq (stmt_info, pattern_stmt);
3524 defs.put (var, gimple_assign_lhs (pattern_stmt));
3527 /* Comparison function to qsort a vector of gimple stmts after UID. */
3529 static int
3530 sort_after_uid (const void *p1, const void *p2)
3532 const gimple *stmt1 = *(const gimple * const *)p1;
3533 const gimple *stmt2 = *(const gimple * const *)p2;
3534 return gimple_uid (stmt1) - gimple_uid (stmt2);
3537 /* Create pattern stmts for all stmts participating in the bool pattern
3538 specified by BOOL_STMT_SET and its root STMT with the desired type
3539 OUT_TYPE. Return the def of the pattern root. */
3541 static tree
3542 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3543 tree out_type, gimple *stmt)
3545 /* Gather original stmts in the bool pattern in their order of appearance
3546 in the IL. */
3547 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3548 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3549 i != bool_stmt_set.end (); ++i)
3550 bool_stmts.quick_push (*i);
3551 bool_stmts.qsort (sort_after_uid);
3553 /* Now process them in that order, producing pattern stmts. */
3554 hash_map <tree, tree> defs;
3555 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3556 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3557 out_type, vinfo_for_stmt (stmt), defs);
3559 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3560 gimple *pattern_stmt
3561 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3562 return gimple_assign_lhs (pattern_stmt);
3565 /* Helper for search_type_for_mask. */
3567 static tree
3568 search_type_for_mask_1 (tree var, vec_info *vinfo,
3569 hash_map<gimple *, tree> &cache)
3571 tree rhs1;
3572 enum tree_code rhs_code;
3573 tree res = NULL_TREE, res2;
3575 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3576 return NULL_TREE;
3578 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3579 if (!def_stmt_info)
3580 return NULL_TREE;
3582 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3583 if (!def_stmt)
3584 return NULL_TREE;
3586 tree *c = cache.get (def_stmt);
3587 if (c)
3588 return *c;
3590 rhs_code = gimple_assign_rhs_code (def_stmt);
3591 rhs1 = gimple_assign_rhs1 (def_stmt);
3593 switch (rhs_code)
3595 case SSA_NAME:
3596 case BIT_NOT_EXPR:
3597 CASE_CONVERT:
3598 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3599 break;
3601 case BIT_AND_EXPR:
3602 case BIT_IOR_EXPR:
3603 case BIT_XOR_EXPR:
3604 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3605 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3606 cache);
3607 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3608 res = res2;
3609 break;
3611 default:
3612 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3614 tree comp_vectype, mask_type;
3616 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3618 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3619 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3620 vinfo, cache);
3621 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3622 res = res2;
3623 break;
3626 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3627 if (comp_vectype == NULL_TREE)
3629 res = NULL_TREE;
3630 break;
3633 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3634 if (!mask_type
3635 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3637 res = NULL_TREE;
3638 break;
3641 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3642 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3644 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3645 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3647 else
3648 res = TREE_TYPE (rhs1);
3652 cache.put (def_stmt, res);
3653 return res;
3656 /* Return the proper type for converting bool VAR into
3657 an integer value or NULL_TREE if no such type exists.
3658 The type is chosen so that converted value has the
3659 same number of elements as VAR's vector type. */
3661 static tree
3662 search_type_for_mask (tree var, vec_info *vinfo)
3664 hash_map<gimple *, tree> cache;
3665 return search_type_for_mask_1 (var, vinfo, cache);
3668 /* Function vect_recog_bool_pattern
3670 Try to find pattern like following:
3672 bool a_b, b_b, c_b, d_b, e_b;
3673 TYPE f_T;
3674 loop:
3675 S1 a_b = x1 CMP1 y1;
3676 S2 b_b = x2 CMP2 y2;
3677 S3 c_b = a_b & b_b;
3678 S4 d_b = x3 CMP3 y3;
3679 S5 e_b = c_b | d_b;
3680 S6 f_T = (TYPE) e_b;
3682 where type 'TYPE' is an integral type. Or a similar pattern
3683 ending in
3685 S6 f_Y = e_b ? r_Y : s_Y;
3687 as results from if-conversion of a complex condition.
3689 Input:
3691 * STMT_VINFO: The stmt at the end from which the pattern
3692 search begins, i.e. cast of a bool to
3693 an integer type.
3695 Output:
3697 * TYPE_OUT: The type of the output of this pattern.
3699 * Return value: A new stmt that will be used to replace the pattern.
3701 Assuming size of TYPE is the same as size of all comparisons
3702 (otherwise some casts would be added where needed), the above
3703 sequence we create related pattern stmts:
3704 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3705 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3706 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3707 S5' e_T = c_T | d_T;
3708 S6' f_T = e_T;
3710 Instead of the above S3' we could emit:
3711 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3712 S3' c_T = a_T | b_T;
3713 but the above is more efficient. */
3715 static gimple *
3716 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3718 gimple *last_stmt = stmt_vinfo->stmt;
3719 enum tree_code rhs_code;
3720 tree var, lhs, rhs, vectype;
3721 stmt_vec_info new_stmt_info;
3722 vec_info *vinfo = stmt_vinfo->vinfo;
3723 gimple *pattern_stmt;
3725 if (!is_gimple_assign (last_stmt))
3726 return NULL;
3728 var = gimple_assign_rhs1 (last_stmt);
3729 lhs = gimple_assign_lhs (last_stmt);
3731 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3732 return NULL;
3734 hash_set<gimple *> bool_stmts;
3736 rhs_code = gimple_assign_rhs_code (last_stmt);
3737 if (CONVERT_EXPR_CODE_P (rhs_code))
3739 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3740 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3741 return NULL;
3742 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3743 if (vectype == NULL_TREE)
3744 return NULL;
3746 if (check_bool_pattern (var, vinfo, bool_stmts))
3748 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3749 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3750 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3751 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3752 else
3753 pattern_stmt
3754 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3756 else
3758 tree type = search_type_for_mask (var, vinfo);
3759 tree cst0, cst1, tmp;
3761 if (!type)
3762 return NULL;
3764 /* We may directly use cond with narrowed type to avoid
3765 multiple cond exprs with following result packing and
3766 perform single cond with packed mask instead. In case
3767 of widening we better make cond first and then extract
3768 results. */
3769 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3770 type = TREE_TYPE (lhs);
3772 cst0 = build_int_cst (type, 0);
3773 cst1 = build_int_cst (type, 1);
3774 tmp = vect_recog_temp_ssa_var (type, NULL);
3775 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3777 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3779 tree new_vectype = get_vectype_for_scalar_type (type);
3780 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3781 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3782 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3783 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3785 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3786 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3790 *type_out = vectype;
3791 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3793 return pattern_stmt;
3795 else if (rhs_code == COND_EXPR
3796 && TREE_CODE (var) == SSA_NAME)
3798 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3799 if (vectype == NULL_TREE)
3800 return NULL;
3802 /* Build a scalar type for the boolean result that when
3803 vectorized matches the vector type of the result in
3804 size and number of elements. */
3805 unsigned prec
3806 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3807 TYPE_VECTOR_SUBPARTS (vectype));
3809 tree type
3810 = build_nonstandard_integer_type (prec,
3811 TYPE_UNSIGNED (TREE_TYPE (var)));
3812 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3813 return NULL;
3815 if (!check_bool_pattern (var, vinfo, bool_stmts))
3816 return NULL;
3818 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3820 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3821 pattern_stmt
3822 = gimple_build_assign (lhs, COND_EXPR,
3823 build2 (NE_EXPR, boolean_type_node,
3824 rhs, build_int_cst (type, 0)),
3825 gimple_assign_rhs2 (last_stmt),
3826 gimple_assign_rhs3 (last_stmt));
3827 *type_out = vectype;
3828 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3830 return pattern_stmt;
3832 else if (rhs_code == SSA_NAME
3833 && STMT_VINFO_DATA_REF (stmt_vinfo))
3835 stmt_vec_info pattern_stmt_info;
3836 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3837 gcc_assert (vectype != NULL_TREE);
3838 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3839 return NULL;
3841 if (check_bool_pattern (var, vinfo, bool_stmts))
3842 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3843 else
3845 tree type = search_type_for_mask (var, vinfo);
3846 tree cst0, cst1, new_vectype;
3848 if (!type)
3849 return NULL;
3851 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3852 type = TREE_TYPE (vectype);
3854 cst0 = build_int_cst (type, 0);
3855 cst1 = build_int_cst (type, 1);
3856 new_vectype = get_vectype_for_scalar_type (type);
3858 rhs = vect_recog_temp_ssa_var (type, NULL);
3859 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3861 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3862 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3863 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3864 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3867 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3868 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3870 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3871 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3872 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3873 rhs = rhs2;
3875 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3876 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3877 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3878 STMT_VINFO_DATA_REF (pattern_stmt_info)
3879 = STMT_VINFO_DATA_REF (stmt_vinfo);
3880 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3881 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3882 *type_out = vectype;
3883 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3885 return pattern_stmt;
3887 else
3888 return NULL;
3892 /* A helper for vect_recog_mask_conversion_pattern. Build
3893 conversion of MASK to a type suitable for masking VECTYPE.
3894 Built statement gets required vectype and is appended to
3895 a pattern sequence of STMT_VINFO.
3897 Return converted mask. */
3899 static tree
3900 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3901 vec_info *vinfo)
3903 gimple *stmt;
3904 tree masktype, tmp;
3905 stmt_vec_info new_stmt_info;
3907 masktype = build_same_sized_truth_vector_type (vectype);
3908 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3909 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3910 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3911 set_vinfo_for_stmt (stmt, new_stmt_info);
3912 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3913 append_pattern_def_seq (stmt_vinfo, stmt);
3915 return tmp;
3919 /* Function vect_recog_mask_conversion_pattern
3921 Try to find statements which require boolean type
3922 converison. Additional conversion statements are
3923 added to handle such cases. For example:
3925 bool m_1, m_2, m_3;
3926 int i_4, i_5;
3927 double d_6, d_7;
3928 char c_1, c_2, c_3;
3930 S1 m_1 = i_4 > i_5;
3931 S2 m_2 = d_6 < d_7;
3932 S3 m_3 = m_1 & m_2;
3933 S4 c_1 = m_3 ? c_2 : c_3;
3935 Will be transformed into:
3937 S1 m_1 = i_4 > i_5;
3938 S2 m_2 = d_6 < d_7;
3939 S3'' m_2' = (_Bool[bitsize=32])m_2
3940 S3' m_3' = m_1 & m_2';
3941 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3942 S4' c_1' = m_3'' ? c_2 : c_3; */
3944 static gimple *
3945 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3947 gimple *last_stmt = stmt_vinfo->stmt;
3948 enum tree_code rhs_code;
3949 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3950 tree vectype1, vectype2;
3951 stmt_vec_info pattern_stmt_info;
3952 vec_info *vinfo = stmt_vinfo->vinfo;
3954 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3955 if (is_gimple_call (last_stmt)
3956 && gimple_call_internal_p (last_stmt)
3957 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3958 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3960 gcall *pattern_stmt;
3961 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3963 if (load)
3965 lhs = gimple_call_lhs (last_stmt);
3966 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3968 else
3970 rhs2 = gimple_call_arg (last_stmt, 3);
3971 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3974 rhs1 = gimple_call_arg (last_stmt, 2);
3975 rhs1_type = search_type_for_mask (rhs1, vinfo);
3976 if (!rhs1_type)
3977 return NULL;
3978 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3980 if (!vectype1 || !vectype2
3981 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3982 TYPE_VECTOR_SUBPARTS (vectype2)))
3983 return NULL;
3985 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3987 if (load)
3989 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3990 pattern_stmt
3991 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3992 gimple_call_arg (last_stmt, 0),
3993 gimple_call_arg (last_stmt, 1),
3994 tmp);
3995 gimple_call_set_lhs (pattern_stmt, lhs);
3997 else
3998 pattern_stmt
3999 = gimple_build_call_internal (IFN_MASK_STORE, 4,
4000 gimple_call_arg (last_stmt, 0),
4001 gimple_call_arg (last_stmt, 1),
4002 tmp,
4003 gimple_call_arg (last_stmt, 3));
4005 gimple_call_set_nothrow (pattern_stmt, true);
4007 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4008 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4009 STMT_VINFO_DATA_REF (pattern_stmt_info)
4010 = STMT_VINFO_DATA_REF (stmt_vinfo);
4011 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4012 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
4013 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4014 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo);
4016 *type_out = vectype1;
4017 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4019 return pattern_stmt;
4022 if (!is_gimple_assign (last_stmt))
4023 return NULL;
4025 gimple *pattern_stmt;
4026 lhs = gimple_assign_lhs (last_stmt);
4027 rhs1 = gimple_assign_rhs1 (last_stmt);
4028 rhs_code = gimple_assign_rhs_code (last_stmt);
4030 /* Check for cond expression requiring mask conversion. */
4031 if (rhs_code == COND_EXPR)
4033 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4035 if (TREE_CODE (rhs1) == SSA_NAME)
4037 rhs1_type = search_type_for_mask (rhs1, vinfo);
4038 if (!rhs1_type)
4039 return NULL;
4041 else if (COMPARISON_CLASS_P (rhs1))
4043 /* Check whether we're comparing scalar booleans and (if so)
4044 whether a better mask type exists than the mask associated
4045 with boolean-sized elements. This avoids unnecessary packs
4046 and unpacks if the booleans are set from comparisons of
4047 wider types. E.g. in:
4049 int x1, x2, x3, x4, y1, y1;
4051 bool b1 = (x1 == x2);
4052 bool b2 = (x3 == x4);
4053 ... = b1 == b2 ? y1 : y2;
4055 it is better for b1 and b2 to use the mask type associated
4056 with int elements rather bool (byte) elements. */
4057 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4058 if (!rhs1_type)
4059 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4061 else
4062 return NULL;
4064 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4066 if (!vectype1 || !vectype2)
4067 return NULL;
4069 /* Continue if a conversion is needed. Also continue if we have
4070 a comparison whose vector type would normally be different from
4071 VECTYPE2 when considered in isolation. In that case we'll
4072 replace the comparison with an SSA name (so that we can record
4073 its vector type) and behave as though the comparison was an SSA
4074 name from the outset. */
4075 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4076 TYPE_VECTOR_SUBPARTS (vectype2))
4077 && (TREE_CODE (rhs1) == SSA_NAME
4078 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4079 return NULL;
4081 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4082 in place, we can handle it in vectorizable_condition. This avoids
4083 unnecessary promotion stmts and increased vectorization factor. */
4084 if (COMPARISON_CLASS_P (rhs1)
4085 && INTEGRAL_TYPE_P (rhs1_type)
4086 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4087 TYPE_VECTOR_SUBPARTS (vectype2)))
4089 enum vect_def_type dt;
4090 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4091 && dt == vect_external_def
4092 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4093 && (dt == vect_external_def
4094 || dt == vect_constant_def))
4096 tree wide_scalar_type = build_nonstandard_integer_type
4097 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4098 TYPE_UNSIGNED (rhs1_type));
4099 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4100 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4101 return NULL;
4105 /* If rhs1 is a comparison we need to move it into a
4106 separate statement. */
4107 if (TREE_CODE (rhs1) != SSA_NAME)
4109 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4110 pattern_stmt = gimple_build_assign (tmp, rhs1);
4111 rhs1 = tmp;
4113 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4114 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4115 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4116 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4119 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4120 TYPE_VECTOR_SUBPARTS (vectype2)))
4121 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4122 else
4123 tmp = rhs1;
4125 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4126 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4127 gimple_assign_rhs2 (last_stmt),
4128 gimple_assign_rhs3 (last_stmt));
4130 *type_out = vectype1;
4131 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4133 return pattern_stmt;
4136 /* Now check for binary boolean operations requiring conversion for
4137 one of operands. */
4138 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4139 return NULL;
4141 if (rhs_code != BIT_IOR_EXPR
4142 && rhs_code != BIT_XOR_EXPR
4143 && rhs_code != BIT_AND_EXPR
4144 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4145 return NULL;
4147 rhs2 = gimple_assign_rhs2 (last_stmt);
4149 rhs1_type = search_type_for_mask (rhs1, vinfo);
4150 rhs2_type = search_type_for_mask (rhs2, vinfo);
4152 if (!rhs1_type || !rhs2_type
4153 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4154 return NULL;
4156 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4158 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4159 if (!vectype1)
4160 return NULL;
4161 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4163 else
4165 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4166 if (!vectype1)
4167 return NULL;
4168 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4171 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4172 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4174 *type_out = vectype1;
4175 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4177 return pattern_stmt;
4180 /* STMT is a load or store. If the load or store is conditional, return
4181 the boolean condition under which it occurs, otherwise return null. */
4183 static tree
4184 vect_get_load_store_mask (gimple *stmt)
4186 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4188 gcc_assert (gimple_assign_single_p (def_assign));
4189 return NULL_TREE;
4192 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4194 internal_fn ifn = gimple_call_internal_fn (def_call);
4195 int mask_index = internal_fn_mask_index (ifn);
4196 return gimple_call_arg (def_call, mask_index);
4199 gcc_unreachable ();
4202 /* Return the scalar offset type that an internal gather/scatter function
4203 should use. GS_INFO describes the gather/scatter operation. */
4205 static tree
4206 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4208 tree offset_type = TREE_TYPE (gs_info->offset);
4209 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4211 /* Enforced by vect_check_gather_scatter. */
4212 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4213 gcc_assert (element_bits >= offset_bits);
4215 /* If the offset is narrower than the elements, extend it according
4216 to its sign. */
4217 if (element_bits > offset_bits)
4218 return build_nonstandard_integer_type (element_bits,
4219 TYPE_UNSIGNED (offset_type));
4221 return offset_type;
4224 /* Return MASK if MASK is suitable for masking an operation on vectors
4225 of type VECTYPE, otherwise convert it into such a form and return
4226 the result. Associate any conversion statements with STMT_INFO's
4227 pattern. */
4229 static tree
4230 vect_convert_mask_for_vectype (tree mask, tree vectype,
4231 stmt_vec_info stmt_info, vec_info *vinfo)
4233 tree mask_type = search_type_for_mask (mask, vinfo);
4234 if (mask_type)
4236 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4237 if (mask_vectype
4238 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4239 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4240 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4242 return mask;
4245 /* Return the equivalent of:
4247 fold_convert (TYPE, VALUE)
4249 with the expectation that the operation will be vectorized.
4250 If new statements are needed, add them as pattern statements
4251 to STMT_INFO. */
4253 static tree
4254 vect_add_conversion_to_patterm (tree type, tree value,
4255 stmt_vec_info stmt_info,
4256 vec_info *vinfo)
4258 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4259 return value;
4261 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4262 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4263 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4264 set_vinfo_for_stmt (conversion, new_stmt_info);
4265 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4266 append_pattern_def_seq (stmt_info, conversion);
4267 return new_value;
4270 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4271 internal function. Return the final statement on success and set
4272 *TYPE_OUT to the vector type being loaded or stored.
4274 This function only handles gathers and scatters that were recognized
4275 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4277 static gimple *
4278 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4280 /* Currently we only support this for loop vectorization. */
4281 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4282 if (!loop_vinfo)
4283 return NULL;
4285 /* Make sure that we're looking at a gather load or scatter store. */
4286 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4287 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4288 return NULL;
4290 /* Get the boolean that controls whether the load or store happens.
4291 This is null if the operation is unconditional. */
4292 gimple *stmt = stmt_info->stmt;
4293 tree mask = vect_get_load_store_mask (stmt);
4295 /* Make sure that the target supports an appropriate internal
4296 function for the gather/scatter operation. */
4297 gather_scatter_info gs_info;
4298 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4299 || gs_info.decl)
4300 return NULL;
4302 /* Convert the mask to the right form. */
4303 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4304 if (mask)
4305 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4306 loop_vinfo);
4308 /* Get the invariant base and non-invariant offset, converting the
4309 latter to the same width as the vector elements. */
4310 tree base = gs_info.base;
4311 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4312 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4313 stmt_info, loop_vinfo);
4315 /* Build the new pattern statement. */
4316 tree scale = size_int (gs_info.scale);
4317 gcall *pattern_stmt;
4318 if (DR_IS_READ (dr))
4320 if (mask != NULL)
4321 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4322 offset, scale, mask);
4323 else
4324 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4325 offset, scale);
4326 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4327 gimple_call_set_lhs (pattern_stmt, load_lhs);
4329 else
4331 tree rhs = vect_get_store_rhs (stmt);
4332 if (mask != NULL)
4333 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4334 base, offset, scale, rhs,
4335 mask);
4336 else
4337 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4338 base, offset, scale, rhs);
4340 gimple_call_set_nothrow (pattern_stmt, true);
4342 /* Copy across relevant vectorization info and associate DR with the
4343 new pattern statement instead of the original statement. */
4344 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4345 loop_vinfo);
4346 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4347 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4348 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4349 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4350 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4351 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4353 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4354 *type_out = vectype;
4355 vect_pattern_detected ("gather/scatter pattern", stmt);
4357 return pattern_stmt;
4360 /* Return true if TYPE is a non-boolean integer type. These are the types
4361 that we want to consider for narrowing. */
4363 static bool
4364 vect_narrowable_type_p (tree type)
4366 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4369 /* Return true if the operation given by CODE can be truncated to N bits
4370 when only N bits of the output are needed. This is only true if bit N+1
4371 of the inputs has no effect on the low N bits of the result. */
4373 static bool
4374 vect_truncatable_operation_p (tree_code code)
4376 switch (code)
4378 case PLUS_EXPR:
4379 case MINUS_EXPR:
4380 case MULT_EXPR:
4381 case BIT_AND_EXPR:
4382 case BIT_IOR_EXPR:
4383 case BIT_XOR_EXPR:
4384 case COND_EXPR:
4385 return true;
4387 default:
4388 return false;
4392 /* Record that STMT_INFO could be changed from operating on TYPE to
4393 operating on a type with the precision and sign given by PRECISION
4394 and SIGN respectively. PRECISION is an arbitrary bit precision;
4395 it might not be a whole number of bytes. */
4397 static void
4398 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4399 unsigned int precision, signop sign)
4401 /* Round the precision up to a whole number of bytes. */
4402 precision = vect_element_precision (precision);
4403 if (precision < TYPE_PRECISION (type)
4404 && (!stmt_info->operation_precision
4405 || stmt_info->operation_precision > precision))
4407 stmt_info->operation_precision = precision;
4408 stmt_info->operation_sign = sign;
4412 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4413 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4414 is an arbitrary bit precision; it might not be a whole number of bytes. */
4416 static void
4417 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4418 unsigned int min_input_precision)
4420 /* This operation in isolation only requires the inputs to have
4421 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4422 that MIN_INPUT_PRECISION is a natural precision for the chain
4423 as a whole. E.g. consider something like:
4425 unsigned short *x, *y;
4426 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4428 The right shift can be done on unsigned chars, and only requires the
4429 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4430 approach would mean turning a natural chain of single-vector unsigned
4431 short operations into one that truncates "*x" and then extends
4432 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4433 operation and one vector for each unsigned char operation.
4434 This would be a significant pessimization.
4436 Instead only propagate the maximum of this precision and the precision
4437 required by the users of the result. This means that we don't pessimize
4438 the case above but continue to optimize things like:
4440 unsigned char *y;
4441 unsigned short *x;
4442 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4444 Here we would truncate two vectors of *x to a single vector of
4445 unsigned chars and use single-vector unsigned char operations for
4446 everything else, rather than doing two unsigned short copies of
4447 "(*x & 0xf0) >> 4" and then truncating the result. */
4448 min_input_precision = MAX (min_input_precision,
4449 stmt_info->min_output_precision);
4451 if (min_input_precision < TYPE_PRECISION (type)
4452 && (!stmt_info->min_input_precision
4453 || stmt_info->min_input_precision > min_input_precision))
4454 stmt_info->min_input_precision = min_input_precision;
4457 /* Subroutine of vect_determine_min_output_precision. Return true if
4458 we can calculate a reduced number of output bits for STMT_INFO,
4459 whose result is LHS. */
4461 static bool
4462 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4464 /* Take the maximum precision required by users of the result. */
4465 unsigned int precision = 0;
4466 imm_use_iterator iter;
4467 use_operand_p use;
4468 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4470 gimple *use_stmt = USE_STMT (use);
4471 if (is_gimple_debug (use_stmt))
4472 continue;
4473 if (!vect_stmt_in_region_p (stmt_info->vinfo, use_stmt))
4474 return false;
4475 stmt_vec_info use_stmt_info = vinfo_for_stmt (use_stmt);
4476 if (!use_stmt_info->min_input_precision)
4477 return false;
4478 precision = MAX (precision, use_stmt_info->min_input_precision);
4481 if (dump_enabled_p ())
4483 dump_printf_loc (MSG_NOTE, vect_location, "only the low %d bits of ",
4484 precision);
4485 dump_generic_expr (MSG_NOTE, TDF_SLIM, lhs);
4486 dump_printf (MSG_NOTE, " are significant\n");
4488 stmt_info->min_output_precision = precision;
4489 return true;
4492 /* Calculate min_output_precision for STMT_INFO. */
4494 static void
4495 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4497 /* We're only interested in statements with a narrowable result. */
4498 tree lhs = gimple_get_lhs (stmt_info->stmt);
4499 if (!lhs
4500 || TREE_CODE (lhs) != SSA_NAME
4501 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4502 return;
4504 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4505 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4508 /* Use range information to decide whether STMT (described by STMT_INFO)
4509 could be done in a narrower type. This is effectively a forward
4510 propagation, since it uses context-independent information that applies
4511 to all users of an SSA name. */
4513 static void
4514 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4516 tree lhs = gimple_assign_lhs (stmt);
4517 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4518 return;
4520 tree type = TREE_TYPE (lhs);
4521 if (!vect_narrowable_type_p (type))
4522 return;
4524 /* First see whether we have any useful range information for the result. */
4525 unsigned int precision = TYPE_PRECISION (type);
4526 signop sign = TYPE_SIGN (type);
4527 wide_int min_value, max_value;
4528 if (!vect_get_range_info (lhs, &min_value, &max_value))
4529 return;
4531 tree_code code = gimple_assign_rhs_code (stmt);
4532 unsigned int nops = gimple_num_ops (stmt);
4534 if (!vect_truncatable_operation_p (code))
4535 /* Check that all relevant input operands are compatible, and update
4536 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4537 for (unsigned int i = 1; i < nops; ++i)
4539 tree op = gimple_op (stmt, i);
4540 if (TREE_CODE (op) == INTEGER_CST)
4542 /* Don't require the integer to have RHS_TYPE (which it might
4543 not for things like shift amounts, etc.), but do require it
4544 to fit the type. */
4545 if (!int_fits_type_p (op, type))
4546 return;
4548 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4549 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4551 else if (TREE_CODE (op) == SSA_NAME)
4553 /* Ignore codes that don't take uniform arguments. */
4554 if (!types_compatible_p (TREE_TYPE (op), type))
4555 return;
4557 wide_int op_min_value, op_max_value;
4558 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4559 return;
4561 min_value = wi::min (min_value, op_min_value, sign);
4562 max_value = wi::max (max_value, op_max_value, sign);
4564 else
4565 return;
4568 /* Try to switch signed types for unsigned types if we can.
4569 This is better for two reasons. First, unsigned ops tend
4570 to be cheaper than signed ops. Second, it means that we can
4571 handle things like:
4573 signed char c;
4574 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4578 signed char c;
4579 unsigned short res_1 = (unsigned short) c & 0xff00;
4580 int res = (int) res_1;
4582 where the intermediate result res_1 has unsigned rather than
4583 signed type. */
4584 if (sign == SIGNED && !wi::neg_p (min_value))
4585 sign = UNSIGNED;
4587 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4588 unsigned int precision1 = wi::min_precision (min_value, sign);
4589 unsigned int precision2 = wi::min_precision (max_value, sign);
4590 unsigned int value_precision = MAX (precision1, precision2);
4591 if (value_precision >= precision)
4592 return;
4594 if (dump_enabled_p ())
4596 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4597 " without loss of precision: ",
4598 sign == SIGNED ? "signed" : "unsigned",
4599 value_precision);
4600 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4603 vect_set_operation_type (stmt_info, type, value_precision, sign);
4604 vect_set_min_input_precision (stmt_info, type, value_precision);
4607 /* Use information about the users of STMT's result to decide whether
4608 STMT (described by STMT_INFO) could be done in a narrower type.
4609 This is effectively a backward propagation. */
4611 static void
4612 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4614 tree_code code = gimple_assign_rhs_code (stmt);
4615 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4616 tree type = TREE_TYPE (gimple_op (stmt, opno));
4617 if (!vect_narrowable_type_p (type))
4618 return;
4620 unsigned int precision = TYPE_PRECISION (type);
4621 unsigned int operation_precision, min_input_precision;
4622 switch (code)
4624 CASE_CONVERT:
4625 /* Only the bits that contribute to the output matter. Don't change
4626 the precision of the operation itself. */
4627 operation_precision = precision;
4628 min_input_precision = stmt_info->min_output_precision;
4629 break;
4631 case LSHIFT_EXPR:
4632 case RSHIFT_EXPR:
4634 tree shift = gimple_assign_rhs2 (stmt);
4635 if (TREE_CODE (shift) != INTEGER_CST
4636 || !wi::ltu_p (wi::to_widest (shift), precision))
4637 return;
4638 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4639 if (code == LSHIFT_EXPR)
4641 /* We need CONST_SHIFT fewer bits of the input. */
4642 operation_precision = stmt_info->min_output_precision;
4643 min_input_precision = (MAX (operation_precision, const_shift)
4644 - const_shift);
4646 else
4648 /* We need CONST_SHIFT extra bits to do the operation. */
4649 operation_precision = (stmt_info->min_output_precision
4650 + const_shift);
4651 min_input_precision = operation_precision;
4653 break;
4656 default:
4657 if (vect_truncatable_operation_p (code))
4659 /* Input bit N has no effect on output bits N-1 and lower. */
4660 operation_precision = stmt_info->min_output_precision;
4661 min_input_precision = operation_precision;
4662 break;
4664 return;
4667 if (operation_precision < precision)
4669 if (dump_enabled_p ())
4671 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4672 " without affecting users: ",
4673 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4674 operation_precision);
4675 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4677 vect_set_operation_type (stmt_info, type, operation_precision,
4678 TYPE_SIGN (type));
4680 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4683 /* Handle vect_determine_precisions for STMT_INFO, given that we
4684 have already done so for the users of its result. */
4686 void
4687 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4689 vect_determine_min_output_precision (stmt_info);
4690 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4692 vect_determine_precisions_from_range (stmt_info, stmt);
4693 vect_determine_precisions_from_users (stmt_info, stmt);
4697 /* Walk backwards through the vectorizable region to determine the
4698 values of these fields:
4700 - min_output_precision
4701 - min_input_precision
4702 - operation_precision
4703 - operation_sign. */
4705 void
4706 vect_determine_precisions (vec_info *vinfo)
4708 DUMP_VECT_SCOPE ("vect_determine_precisions");
4710 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4712 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4713 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4714 unsigned int nbbs = loop->num_nodes;
4716 for (unsigned int i = 0; i < nbbs; i++)
4718 basic_block bb = bbs[nbbs - i - 1];
4719 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4720 !gsi_end_p (si); gsi_prev (&si))
4721 vect_determine_stmt_precisions (vinfo_for_stmt (gsi_stmt (si)));
4724 else
4726 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4727 gimple_stmt_iterator si = bb_vinfo->region_end;
4728 gimple *stmt;
4731 if (!gsi_stmt (si))
4732 si = gsi_last_bb (bb_vinfo->bb);
4733 else
4734 gsi_prev (&si);
4735 stmt = gsi_stmt (si);
4736 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4737 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4738 vect_determine_stmt_precisions (stmt_info);
4740 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4744 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
4746 struct vect_recog_func
4748 vect_recog_func_ptr fn;
4749 const char *name;
4752 /* Note that ordering matters - the first pattern matching on a stmt is
4753 taken which means usually the more complex one needs to preceed the
4754 less comples onex (widen_sum only after dot_prod or sad for example). */
4755 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4756 { vect_recog_over_widening_pattern, "over_widening" },
4757 /* Must come after over_widening, which narrows the shift as much as
4758 possible beforehand. */
4759 { vect_recog_average_pattern, "average" },
4760 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4761 { vect_recog_widen_mult_pattern, "widen_mult" },
4762 { vect_recog_dot_prod_pattern, "dot_prod" },
4763 { vect_recog_sad_pattern, "sad" },
4764 { vect_recog_widen_sum_pattern, "widen_sum" },
4765 { vect_recog_pow_pattern, "pow" },
4766 { vect_recog_widen_shift_pattern, "widen_shift" },
4767 { vect_recog_rotate_pattern, "rotate" },
4768 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4769 { vect_recog_divmod_pattern, "divmod" },
4770 { vect_recog_mult_pattern, "mult" },
4771 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4772 { vect_recog_bool_pattern, "bool" },
4773 /* This must come before mask conversion, and includes the parts
4774 of mask conversion that are needed for gather and scatter
4775 internal functions. */
4776 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4777 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4780 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4782 /* Mark statements that are involved in a pattern. */
4784 static inline void
4785 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4786 tree pattern_vectype)
4788 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4789 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4791 bool old_pattern_p = is_pattern_stmt_p (orig_stmt_info);
4792 if (old_pattern_p)
4794 /* We're replacing a statement in an existing pattern definition
4795 sequence. */
4796 if (dump_enabled_p ())
4798 dump_printf_loc (MSG_NOTE, vect_location,
4799 "replacing earlier pattern ");
4800 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, orig_stmt, 0);
4803 /* To keep the book-keeping simple, just swap the lhs of the
4804 old and new statements, so that the old one has a valid but
4805 unused lhs. */
4806 tree old_lhs = gimple_get_lhs (orig_stmt);
4807 gimple_set_lhs (orig_stmt, gimple_get_lhs (pattern_stmt));
4808 gimple_set_lhs (pattern_stmt, old_lhs);
4810 if (dump_enabled_p ())
4812 dump_printf_loc (MSG_NOTE, vect_location, "with ");
4813 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4816 /* Switch to the statement that ORIG replaces. */
4817 orig_stmt_info
4818 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (orig_stmt_info));
4820 /* We shouldn't be replacing the main pattern statement. */
4821 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) != orig_stmt);
4824 if (def_seq)
4825 for (gimple_stmt_iterator si = gsi_start (def_seq);
4826 !gsi_end_p (si); gsi_next (&si))
4827 vect_init_pattern_stmt (gsi_stmt (si), orig_stmt_info, pattern_vectype);
4829 if (old_pattern_p)
4831 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4833 /* Insert all the new pattern statements before the original one. */
4834 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4835 gimple_stmt_iterator gsi = gsi_for_stmt (orig_stmt, orig_def_seq);
4836 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4837 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4839 /* Remove the pattern statement that this new pattern replaces. */
4840 gsi_remove (&gsi, false);
4842 else
4843 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4846 /* Function vect_pattern_recog_1
4848 Input:
4849 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4850 computation pattern.
4851 STMT: A stmt from which the pattern search should start.
4853 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4854 a sequence of statements that has the same functionality and can be
4855 used to replace STMT. It returns the last statement in the sequence
4856 and adds any earlier statements to STMT's STMT_VINFO_PATTERN_DEF_SEQ.
4857 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4858 statement, having first checked that the target supports the new operation
4859 in that type.
4861 This function also does some bookkeeping, as explained in the documentation
4862 for vect_recog_pattern. */
4864 static void
4865 vect_pattern_recog_1 (vect_recog_func *recog_func, gimple_stmt_iterator si)
4867 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4868 stmt_vec_info stmt_info;
4869 loop_vec_info loop_vinfo;
4870 tree pattern_vectype;
4872 /* If this statement has already been replaced with pattern statements,
4873 leave the original statement alone, since the first match wins.
4874 Instead try to match against the definition statements that feed
4875 the main pattern statement. */
4876 stmt_info = vinfo_for_stmt (stmt);
4877 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4879 gimple_stmt_iterator gsi;
4880 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4881 !gsi_end_p (gsi); gsi_next (&gsi))
4882 vect_pattern_recog_1 (recog_func, gsi);
4883 return;
4886 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4887 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
4888 if (!pattern_stmt)
4890 /* Clear any half-formed pattern definition sequence. */
4891 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4892 return;
4895 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4896 gcc_assert (pattern_vectype);
4898 /* Found a vectorizable pattern. */
4899 if (dump_enabled_p ())
4901 dump_printf_loc (MSG_NOTE, vect_location,
4902 "%s pattern recognized: ", recog_func->name);
4903 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4906 /* Mark the stmts that are involved in the pattern. */
4907 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4909 /* Patterns cannot be vectorized using SLP, because they change the order of
4910 computation. */
4911 if (loop_vinfo)
4913 unsigned ix, ix2;
4914 gimple **elem_ptr;
4915 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4916 elem_ptr, *elem_ptr == stmt);
4921 /* Function vect_pattern_recog
4923 Input:
4924 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4925 computation idioms.
4927 Output - for each computation idiom that is detected we create a new stmt
4928 that provides the same functionality and that can be vectorized. We
4929 also record some information in the struct_stmt_info of the relevant
4930 stmts, as explained below:
4932 At the entry to this function we have the following stmts, with the
4933 following initial value in the STMT_VINFO fields:
4935 stmt in_pattern_p related_stmt vec_stmt
4936 S1: a_i = .... - - -
4937 S2: a_2 = ..use(a_i).. - - -
4938 S3: a_1 = ..use(a_2).. - - -
4939 S4: a_0 = ..use(a_1).. - - -
4940 S5: ... = ..use(a_0).. - - -
4942 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4943 represented by a single stmt. We then:
4944 - create a new stmt S6 equivalent to the pattern (the stmt is not
4945 inserted into the code)
4946 - fill in the STMT_VINFO fields as follows:
4948 in_pattern_p related_stmt vec_stmt
4949 S1: a_i = .... - - -
4950 S2: a_2 = ..use(a_i).. - - -
4951 S3: a_1 = ..use(a_2).. - - -
4952 S4: a_0 = ..use(a_1).. true S6 -
4953 '---> S6: a_new = .... - S4 -
4954 S5: ... = ..use(a_0).. - - -
4956 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4957 to each other through the RELATED_STMT field).
4959 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4960 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4961 remain irrelevant unless used by stmts other than S4.
4963 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4964 (because they are marked as irrelevant). It will vectorize S6, and record
4965 a pointer to the new vector stmt VS6 from S6 (as usual).
4966 S4 will be skipped, and S5 will be vectorized as usual:
4968 in_pattern_p related_stmt vec_stmt
4969 S1: a_i = .... - - -
4970 S2: a_2 = ..use(a_i).. - - -
4971 S3: a_1 = ..use(a_2).. - - -
4972 > VS6: va_new = .... - - -
4973 S4: a_0 = ..use(a_1).. true S6 VS6
4974 '---> S6: a_new = .... - S4 VS6
4975 > VS5: ... = ..vuse(va_new).. - - -
4976 S5: ... = ..use(a_0).. - - -
4978 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4979 elsewhere), and we'll end up with:
4981 VS6: va_new = ....
4982 VS5: ... = ..vuse(va_new)..
4984 In case of more than one pattern statements, e.g., widen-mult with
4985 intermediate type:
4987 S1 a_t = ;
4988 S2 a_T = (TYPE) a_t;
4989 '--> S3: a_it = (interm_type) a_t;
4990 S4 prod_T = a_T * CONST;
4991 '--> S5: prod_T' = a_it w* CONST;
4993 there may be other users of a_T outside the pattern. In that case S2 will
4994 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4995 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4996 be recorded in S3. */
4998 void
4999 vect_pattern_recog (vec_info *vinfo)
5001 struct loop *loop;
5002 basic_block *bbs;
5003 unsigned int nbbs;
5004 gimple_stmt_iterator si;
5005 unsigned int i, j;
5007 vect_determine_precisions (vinfo);
5009 DUMP_VECT_SCOPE ("vect_pattern_recog");
5011 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5013 loop = LOOP_VINFO_LOOP (loop_vinfo);
5014 bbs = LOOP_VINFO_BBS (loop_vinfo);
5015 nbbs = loop->num_nodes;
5017 /* Scan through the loop stmts, applying the pattern recognition
5018 functions starting at each stmt visited: */
5019 for (i = 0; i < nbbs; i++)
5021 basic_block bb = bbs[i];
5022 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5023 /* Scan over all generic vect_recog_xxx_pattern functions. */
5024 for (j = 0; j < NUM_PATTERNS; j++)
5025 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si);
5028 else
5030 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5031 for (si = bb_vinfo->region_begin;
5032 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
5034 gimple *stmt = gsi_stmt (si);
5035 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5036 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
5037 continue;
5039 /* Scan over all generic vect_recog_xxx_pattern functions. */
5040 for (j = 0; j < NUM_PATTERNS; j++)
5041 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si);