[19/46] Make vect_dr_stmt return a stmt_vec_info
[official-gcc.git] / gcc / tree-vect-patterns.c
blobe24ff5f6be5b1f15683a3816ee60523f0f888d94
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
53 static bool
54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
56 value_range_type vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
71 return true;
73 else
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
80 return false;
84 /* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
87 static void
88 vect_pattern_detected (const char *name, gimple *stmt)
90 if (dump_enabled_p ())
92 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: ", name);
93 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
97 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
98 return the pattern statement's stmt_vec_info. Set its vector type to
99 VECTYPE if it doesn't have one already. */
101 static stmt_vec_info
102 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
103 tree vectype)
105 vec_info *vinfo = orig_stmt_info->vinfo;
106 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
107 if (pattern_stmt_info == NULL_STMT_VEC_INFO)
108 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
109 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
111 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
112 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
113 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
114 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
115 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
116 return pattern_stmt_info;
119 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
120 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
121 have one already. */
123 static void
124 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
125 tree vectype)
127 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
128 STMT_VINFO_RELATED_STMT (orig_stmt_info)
129 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
132 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
133 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
134 be different from the vector type of the final pattern statement. */
136 static inline void
137 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
138 tree vectype = NULL_TREE)
140 vec_info *vinfo = stmt_info->vinfo;
141 if (vectype)
143 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
144 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
146 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
147 new_stmt);
150 /* The caller wants to perform new operations on vect_external variable
151 VAR, so that the result of the operations would also be vect_external.
152 Return the edge on which the operations can be performed, if one exists.
153 Return null if the operations should instead be treated as part of
154 the pattern that needs them. */
156 static edge
157 vect_get_external_def_edge (vec_info *vinfo, tree var)
159 edge e = NULL;
160 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
162 e = loop_preheader_edge (loop_vinfo->loop);
163 if (!SSA_NAME_IS_DEFAULT_DEF (var))
165 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
166 if (bb == NULL
167 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
168 e = NULL;
171 return e;
174 /* Return true if the target supports a vector version of CODE,
175 where CODE is known to map to a direct optab. ITYPE specifies
176 the type of (some of) the scalar inputs and OTYPE specifies the
177 type of the scalar result.
179 If CODE allows the inputs and outputs to have different type
180 (such as for WIDEN_SUM_EXPR), it is the input mode rather
181 than the output mode that determines the appropriate target pattern.
182 Operand 0 of the target pattern then specifies the mode that the output
183 must have.
185 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
186 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
187 is nonnull. */
189 static bool
190 vect_supportable_direct_optab_p (tree otype, tree_code code,
191 tree itype, tree *vecotype_out,
192 tree *vecitype_out = NULL)
194 tree vecitype = get_vectype_for_scalar_type (itype);
195 if (!vecitype)
196 return false;
198 tree vecotype = get_vectype_for_scalar_type (otype);
199 if (!vecotype)
200 return false;
202 optab optab = optab_for_tree_code (code, vecitype, optab_default);
203 if (!optab)
204 return false;
206 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
207 if (icode == CODE_FOR_nothing
208 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
209 return false;
211 *vecotype_out = vecotype;
212 if (vecitype_out)
213 *vecitype_out = vecitype;
214 return true;
217 /* Round bit precision PRECISION up to a full element. */
219 static unsigned int
220 vect_element_precision (unsigned int precision)
222 precision = 1 << ceil_log2 (precision);
223 return MAX (precision, BITS_PER_UNIT);
226 /* If OP is defined by a statement that's being considered for vectorization,
227 return information about that statement, otherwise return NULL. */
229 static stmt_vec_info
230 vect_get_internal_def (vec_info *vinfo, tree op)
232 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
233 if (def_stmt_info
234 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
235 return def_stmt_info;
236 return NULL;
239 /* Check whether NAME, an ssa-name used in USE_STMT,
240 is a result of a type promotion, such that:
241 DEF_STMT: NAME = NOP (name0)
242 If CHECK_SIGN is TRUE, check that either both types are signed or both are
243 unsigned. */
245 static bool
246 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
247 tree *orig_type, gimple **def_stmt, bool *promotion)
249 stmt_vec_info stmt_vinfo;
250 tree type = TREE_TYPE (name);
251 tree oprnd0;
252 enum vect_def_type dt;
254 stmt_vinfo = vinfo_for_stmt (use_stmt);
255 stmt_vec_info def_stmt_info;
256 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
257 def_stmt))
258 return false;
260 if (dt != vect_internal_def
261 && dt != vect_external_def && dt != vect_constant_def)
262 return false;
264 if (!*def_stmt)
265 return false;
267 if (!is_gimple_assign (*def_stmt))
268 return false;
270 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
271 return false;
273 oprnd0 = gimple_assign_rhs1 (*def_stmt);
275 *orig_type = TREE_TYPE (oprnd0);
276 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
277 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
278 return false;
280 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
281 *promotion = true;
282 else
283 *promotion = false;
285 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
286 return false;
288 return true;
291 /* Holds information about an input operand after some sign changes
292 and type promotions have been peeled away. */
293 struct vect_unpromoted_value {
294 vect_unpromoted_value ();
296 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
298 /* The value obtained after peeling away zero or more casts. */
299 tree op;
301 /* The type of OP. */
302 tree type;
304 /* The definition type of OP. */
305 vect_def_type dt;
307 /* If OP is the result of peeling at least one cast, and if the cast
308 of OP itself is a vectorizable statement, CASTER identifies that
309 statement, otherwise it is null. */
310 stmt_vec_info caster;
313 inline vect_unpromoted_value::vect_unpromoted_value ()
314 : op (NULL_TREE),
315 type (NULL_TREE),
316 dt (vect_uninitialized_def),
317 caster (NULL)
321 /* Set the operand to OP_IN, its definition type to DT_IN, and the
322 statement that casts it to CASTER_IN. */
324 inline void
325 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
326 stmt_vec_info caster_in)
328 op = op_in;
329 type = TREE_TYPE (op);
330 dt = dt_in;
331 caster = caster_in;
334 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
335 to reach some vectorizable inner operand OP', continuing as long as it
336 is possible to convert OP' back to OP using a possible sign change
337 followed by a possible promotion P. Return this OP', or null if OP is
338 not a vectorizable SSA name. If there is a promotion P, describe its
339 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
340 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
341 have more than one user.
343 A successful return means that it is possible to go from OP' to OP
344 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
345 whereas the cast from UNPROM to OP might be a promotion, a sign
346 change, or a nop.
348 E.g. say we have:
350 signed short *ptr = ...;
351 signed short C = *ptr;
352 unsigned short B = (unsigned short) C; // sign change
353 signed int A = (signed int) B; // unsigned promotion
354 ...possible other uses of A...
355 unsigned int OP = (unsigned int) A; // sign change
357 In this case it's possible to go directly from C to OP using:
359 OP = (unsigned int) (unsigned short) C;
360 +------------+ +--------------+
361 promotion sign change
363 so OP' would be C. The input to the promotion is B, so UNPROM
364 would describe B. */
366 static tree
367 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
368 vect_unpromoted_value *unprom,
369 bool *single_use_p = NULL)
371 tree res = NULL_TREE;
372 tree op_type = TREE_TYPE (op);
373 unsigned int orig_precision = TYPE_PRECISION (op_type);
374 stmt_vec_info caster = NULL;
375 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
377 /* See whether OP is simple enough to vectorize. */
378 stmt_vec_info def_stmt_info;
379 gimple *def_stmt;
380 vect_def_type dt;
381 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
382 break;
384 /* If OP is the input of a demotion, skip over it to see whether
385 OP is itself the result of a promotion. If so, the combined
386 effect of the promotion and the demotion might fit the required
387 pattern, otherwise neither operation fits.
389 This copes with cases such as the result of an arithmetic
390 operation being truncated before being stored, and where that
391 arithmetic operation has been recognized as an over-widened one. */
392 if (TYPE_PRECISION (op_type) <= orig_precision)
394 /* Use OP as the UNPROM described above if we haven't yet
395 found a promotion, or if using the new input preserves the
396 sign of the previous promotion. */
397 if (!res
398 || TYPE_PRECISION (unprom->type) == orig_precision
399 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
400 unprom->set_op (op, dt, caster);
401 /* Stop if we've already seen a promotion and if this
402 conversion does more than change the sign. */
403 else if (TYPE_PRECISION (op_type)
404 != TYPE_PRECISION (unprom->type))
405 break;
407 /* The sequence now extends to OP. */
408 res = op;
411 /* See whether OP is defined by a cast. Record it as CASTER if
412 the cast is potentially vectorizable. */
413 if (!def_stmt)
414 break;
415 caster = def_stmt_info;
417 /* Ignore pattern statements, since we don't link uses for them. */
418 if (caster
419 && single_use_p
420 && !STMT_VINFO_RELATED_STMT (caster)
421 && !has_single_use (res))
422 *single_use_p = false;
424 gassign *assign = dyn_cast <gassign *> (def_stmt);
425 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
426 break;
428 /* Continue with the input to the cast. */
429 op = gimple_assign_rhs1 (def_stmt);
430 op_type = TREE_TYPE (op);
432 return res;
435 /* OP is an integer operand to an operation that returns TYPE, and we
436 want to treat the operation as a widening one. So far we can treat
437 it as widening from *COMMON_TYPE.
439 Return true if OP is suitable for such a widening operation,
440 either widening from *COMMON_TYPE or from some supertype of it.
441 Update *COMMON_TYPE to the supertype in the latter case.
443 SHIFT_P is true if OP is a shift amount. */
445 static bool
446 vect_joust_widened_integer (tree type, bool shift_p, tree op,
447 tree *common_type)
449 /* Calculate the minimum precision required by OP, without changing
450 the sign of either operand. */
451 unsigned int precision;
452 if (shift_p)
454 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
455 return false;
456 precision = TREE_INT_CST_LOW (op);
458 else
460 precision = wi::min_precision (wi::to_widest (op),
461 TYPE_SIGN (*common_type));
462 if (precision * 2 > TYPE_PRECISION (type))
463 return false;
466 /* If OP requires a wider type, switch to that type. The checks
467 above ensure that this is still narrower than the result. */
468 precision = vect_element_precision (precision);
469 if (TYPE_PRECISION (*common_type) < precision)
470 *common_type = build_nonstandard_integer_type
471 (precision, TYPE_UNSIGNED (*common_type));
472 return true;
475 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
476 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
478 static bool
479 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
481 if (types_compatible_p (*common_type, new_type))
482 return true;
484 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
485 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
486 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
487 return true;
489 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
490 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
491 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
493 *common_type = new_type;
494 return true;
497 /* We have mismatched signs, with the signed type being
498 no wider than the unsigned type. In this case we need
499 a wider signed type. */
500 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
501 TYPE_PRECISION (new_type));
502 precision *= 2;
503 if (precision * 2 > TYPE_PRECISION (type))
504 return false;
506 *common_type = build_nonstandard_integer_type (precision, false);
507 return true;
510 /* Check whether STMT_INFO can be viewed as a tree of integer operations
511 in which each node either performs CODE or WIDENED_CODE, and where
512 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
513 specifies the maximum number of leaf operands. SHIFT_P says whether
514 CODE and WIDENED_CODE are some sort of shift.
516 If STMT_INFO is such a tree, return the number of leaf operands
517 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
518 to a type that (a) is narrower than the result of STMT_INFO and
519 (b) can hold all leaf operand values.
521 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
522 exists. */
524 static unsigned int
525 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
526 tree_code widened_code, bool shift_p,
527 unsigned int max_nops,
528 vect_unpromoted_value *unprom, tree *common_type)
530 /* Check for an integer operation with the right code. */
531 vec_info *vinfo = stmt_info->vinfo;
532 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
533 if (!assign)
534 return 0;
536 tree_code rhs_code = gimple_assign_rhs_code (assign);
537 if (rhs_code != code && rhs_code != widened_code)
538 return 0;
540 tree type = gimple_expr_type (assign);
541 if (!INTEGRAL_TYPE_P (type))
542 return 0;
544 /* Assume that both operands will be leaf operands. */
545 max_nops -= 2;
547 /* Check the operands. */
548 unsigned int next_op = 0;
549 for (unsigned int i = 0; i < 2; ++i)
551 vect_unpromoted_value *this_unprom = &unprom[next_op];
552 unsigned int nops = 1;
553 tree op = gimple_op (assign, i + 1);
554 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
556 /* We already have a common type from earlier operands.
557 Update it to account for OP. */
558 this_unprom->set_op (op, vect_constant_def);
559 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
560 return 0;
562 else
564 /* Only allow shifts by constants. */
565 if (shift_p && i == 1)
566 return 0;
568 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
569 this_unprom))
570 return 0;
572 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
574 /* The operand isn't widened. If STMT_INFO has the code
575 for an unwidened operation, recursively check whether
576 this operand is a node of the tree. */
577 if (rhs_code != code
578 || max_nops == 0
579 || this_unprom->dt != vect_internal_def)
580 return 0;
582 /* Give back the leaf slot allocated above now that we're
583 not treating this as a leaf operand. */
584 max_nops += 1;
586 /* Recursively process the definition of the operand. */
587 stmt_vec_info def_stmt_info
588 = vinfo->lookup_def (this_unprom->op);
589 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
590 shift_p, max_nops, this_unprom,
591 common_type);
592 if (nops == 0)
593 return 0;
595 max_nops -= nops;
597 else
599 /* Make sure that the operand is narrower than the result. */
600 if (TYPE_PRECISION (this_unprom->type) * 2
601 > TYPE_PRECISION (type))
602 return 0;
604 /* Update COMMON_TYPE for the new operand. */
605 if (i == 0)
606 *common_type = this_unprom->type;
607 else if (!vect_joust_widened_type (type, this_unprom->type,
608 common_type))
609 return 0;
612 next_op += nops;
614 return next_op;
617 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
618 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
620 static tree
621 vect_recog_temp_ssa_var (tree type, gimple *stmt)
623 return make_temp_ssa_name (type, stmt, "patt");
626 /* STMT2_INFO describes a type conversion that could be split into STMT1
627 followed by a version of STMT2_INFO that takes NEW_RHS as its first
628 input. Try to do this using pattern statements, returning true on
629 success. */
631 static bool
632 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
633 gimple *stmt1, tree vectype)
635 if (is_pattern_stmt_p (stmt2_info))
637 /* STMT2_INFO is part of a pattern. Get the statement to which
638 the pattern is attached. */
639 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
640 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
642 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "Splitting pattern statement: ");
646 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
649 /* Since STMT2_INFO is a pattern statement, we can change it
650 in-situ without worrying about changing the code for the
651 containing block. */
652 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE, vect_location, "into: ");
657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
658 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
662 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
663 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
664 /* STMT2_INFO is the actual pattern statement. Add STMT1
665 to the end of the definition sequence. */
666 gimple_seq_add_stmt_without_update (def_seq, stmt1);
667 else
669 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
670 before it. */
671 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
672 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
674 return true;
676 else
678 /* STMT2_INFO doesn't yet have a pattern. Try to create a
679 two-statement pattern now. */
680 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
681 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
682 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
683 if (!lhs_vectype)
684 return false;
686 if (dump_enabled_p ())
688 dump_printf_loc (MSG_NOTE, vect_location,
689 "Splitting statement: ");
690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
693 /* Add STMT1 as a singleton pattern definition sequence. */
694 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
695 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
696 gimple_seq_add_stmt_without_update (def_seq, stmt1);
698 /* Build the second of the two pattern statements. */
699 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
700 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
701 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
703 if (dump_enabled_p ())
705 dump_printf_loc (MSG_NOTE, vect_location,
706 "into pattern statements: ");
707 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
708 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
709 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt2, 0);
712 return true;
716 /* Convert UNPROM to TYPE and return the result, adding new statements
717 to STMT_INFO's pattern definition statements if no better way is
718 available. VECTYPE is the vector form of TYPE. */
720 static tree
721 vect_convert_input (stmt_vec_info stmt_info, tree type,
722 vect_unpromoted_value *unprom, tree vectype)
724 /* Check for a no-op conversion. */
725 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
726 return unprom->op;
728 /* Allow the caller to create constant vect_unpromoted_values. */
729 if (TREE_CODE (unprom->op) == INTEGER_CST)
730 return wide_int_to_tree (type, wi::to_widest (unprom->op));
732 /* See if we can reuse an existing result. */
733 if (unprom->caster)
735 tree lhs = gimple_get_lhs (unprom->caster->stmt);
736 if (types_compatible_p (TREE_TYPE (lhs), type))
737 return lhs;
740 /* We need a new conversion statement. */
741 tree new_op = vect_recog_temp_ssa_var (type, NULL);
742 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
744 /* If the operation is the input to a vectorizable cast, try splitting
745 that cast into two, taking the required result as a mid-way point. */
746 if (unprom->caster)
748 tree lhs = gimple_get_lhs (unprom->caster->stmt);
749 if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
750 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
751 && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
752 && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
753 return new_op;
756 /* If OP is an external value, see if we can insert the new statement
757 on an incoming edge. */
758 if (unprom->dt == vect_external_def)
759 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
761 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
762 gcc_assert (!new_bb);
763 return new_op;
766 /* As a (common) last resort, add the statement to the pattern itself. */
767 append_pattern_def_seq (stmt_info, new_stmt, vectype);
768 return new_op;
771 /* Invoke vect_convert_input for N elements of UNPROM and store the
772 result in the corresponding elements of RESULT. */
774 static void
775 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
776 tree *result, tree type, vect_unpromoted_value *unprom,
777 tree vectype)
779 for (unsigned int i = 0; i < n; ++i)
781 unsigned int j;
782 for (j = 0; j < i; ++j)
783 if (unprom[j].op == unprom[i].op)
784 break;
785 if (j < i)
786 result[i] = result[j];
787 else
788 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
792 /* The caller has created a (possibly empty) sequence of pattern definition
793 statements followed by a single statement PATTERN_STMT. Cast the result
794 of this final statement to TYPE. If a new statement is needed, add
795 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
796 and return the new statement, otherwise return PATTERN_STMT as-is.
797 VECITYPE is the vector form of PATTERN_STMT's result type. */
799 static gimple *
800 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
801 tree vecitype)
803 tree lhs = gimple_get_lhs (pattern_stmt);
804 if (!types_compatible_p (type, TREE_TYPE (lhs)))
806 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
807 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
808 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
810 return pattern_stmt;
813 /* Return true if STMT_VINFO describes a reduction for which reassociation
814 is allowed. If STMT_INFO is part of a group, assume that it's part of
815 a reduction chain and optimistically assume that all statements
816 except the last allow reassociation. */
818 static bool
819 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
821 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
822 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
823 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
826 /* As above, but also require it to have code CODE and to be a reduction
827 in the outermost loop. When returning true, store the operands in
828 *OP0_OUT and *OP1_OUT. */
830 static bool
831 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
832 tree *op0_out, tree *op1_out)
834 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
835 if (!loop_info)
836 return false;
838 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
839 if (!assign || gimple_assign_rhs_code (assign) != code)
840 return false;
842 /* We don't allow changing the order of the computation in the inner-loop
843 when doing outer-loop vectorization. */
844 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
845 if (loop && nested_in_vect_loop_p (loop, assign))
846 return false;
848 if (!vect_reassociating_reduction_p (stmt_info))
849 return false;
851 *op0_out = gimple_assign_rhs1 (assign);
852 *op1_out = gimple_assign_rhs2 (assign);
853 return true;
856 /* Function vect_recog_dot_prod_pattern
858 Try to find the following pattern:
860 type x_t, y_t;
861 TYPE1 prod;
862 TYPE2 sum = init;
863 loop:
864 sum_0 = phi <init, sum_1>
865 S1 x_t = ...
866 S2 y_t = ...
867 S3 x_T = (TYPE1) x_t;
868 S4 y_T = (TYPE1) y_t;
869 S5 prod = x_T * y_T;
870 [S6 prod = (TYPE2) prod; #optional]
871 S7 sum_1 = prod + sum_0;
873 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
874 same size of 'TYPE1' or bigger. This is a special case of a reduction
875 computation.
877 Input:
879 * STMT_VINFO: The stmt from which the pattern search begins. In the
880 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
881 will be detected.
883 Output:
885 * TYPE_OUT: The type of the output of this pattern.
887 * Return value: A new stmt that will be used to replace the sequence of
888 stmts that constitute the pattern. In this case it will be:
889 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
891 Note: The dot-prod idiom is a widening reduction pattern that is
892 vectorized without preserving all the intermediate results. It
893 produces only N/2 (widened) results (by summing up pairs of
894 intermediate results) rather than all N results. Therefore, we
895 cannot allow this pattern when we want to get all the results and in
896 the correct order (as is the case when this computation is in an
897 inner-loop nested in an outer-loop that us being vectorized). */
899 static gimple *
900 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
902 tree oprnd0, oprnd1;
903 gimple *last_stmt = stmt_vinfo->stmt;
904 vec_info *vinfo = stmt_vinfo->vinfo;
905 tree type, half_type;
906 gimple *pattern_stmt;
907 tree var;
909 /* Look for the following pattern
910 DX = (TYPE1) X;
911 DY = (TYPE1) Y;
912 DPROD = DX * DY;
913 DDPROD = (TYPE2) DPROD;
914 sum_1 = DDPROD + sum_0;
915 In which
916 - DX is double the size of X
917 - DY is double the size of Y
918 - DX, DY, DPROD all have the same type
919 - sum is the same size of DPROD or bigger
920 - sum has been recognized as a reduction variable.
922 This is equivalent to:
923 DPROD = X w* Y; #widen mult
924 sum_1 = DPROD w+ sum_0; #widen summation
926 DPROD = X w* Y; #widen mult
927 sum_1 = DPROD + sum_0; #summation
930 /* Starting from LAST_STMT, follow the defs of its uses in search
931 of the above pattern. */
933 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
934 &oprnd0, &oprnd1))
935 return NULL;
937 type = gimple_expr_type (last_stmt);
939 vect_unpromoted_value unprom_mult;
940 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
942 /* So far so good. Since last_stmt was detected as a (summation) reduction,
943 we know that oprnd1 is the reduction variable (defined by a loop-header
944 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
945 Left to check that oprnd0 is defined by a (widen_)mult_expr */
946 if (!oprnd0)
947 return NULL;
949 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
950 if (!mult_vinfo)
951 return NULL;
953 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
954 inside the loop (in case we are analyzing an outer-loop). */
955 vect_unpromoted_value unprom0[2];
956 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
957 false, 2, unprom0, &half_type))
958 return NULL;
960 /* If there are two widening operations, make sure they agree on
961 the sign of the extension. */
962 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
963 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
964 return NULL;
966 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
968 tree half_vectype;
969 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
970 type_out, &half_vectype))
971 return NULL;
973 /* Get the inputs in the appropriate types. */
974 tree mult_oprnd[2];
975 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
976 unprom0, half_vectype);
978 var = vect_recog_temp_ssa_var (type, NULL);
979 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
980 mult_oprnd[0], mult_oprnd[1], oprnd1);
982 return pattern_stmt;
986 /* Function vect_recog_sad_pattern
988 Try to find the following Sum of Absolute Difference (SAD) pattern:
990 type x_t, y_t;
991 signed TYPE1 diff, abs_diff;
992 TYPE2 sum = init;
993 loop:
994 sum_0 = phi <init, sum_1>
995 S1 x_t = ...
996 S2 y_t = ...
997 S3 x_T = (TYPE1) x_t;
998 S4 y_T = (TYPE1) y_t;
999 S5 diff = x_T - y_T;
1000 S6 abs_diff = ABS_EXPR <diff>;
1001 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1002 S8 sum_1 = abs_diff + sum_0;
1004 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1005 same size of 'TYPE1' or bigger. This is a special case of a reduction
1006 computation.
1008 Input:
1010 * STMT_VINFO: The stmt from which the pattern search begins. In the
1011 example, when this function is called with S8, the pattern
1012 {S3,S4,S5,S6,S7,S8} will be detected.
1014 Output:
1016 * TYPE_OUT: The type of the output of this pattern.
1018 * Return value: A new stmt that will be used to replace the sequence of
1019 stmts that constitute the pattern. In this case it will be:
1020 SAD_EXPR <x_t, y_t, sum_0>
1023 static gimple *
1024 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1026 gimple *last_stmt = stmt_vinfo->stmt;
1027 vec_info *vinfo = stmt_vinfo->vinfo;
1028 tree half_type;
1030 /* Look for the following pattern
1031 DX = (TYPE1) X;
1032 DY = (TYPE1) Y;
1033 DDIFF = DX - DY;
1034 DAD = ABS_EXPR <DDIFF>;
1035 DDPROD = (TYPE2) DPROD;
1036 sum_1 = DAD + sum_0;
1037 In which
1038 - DX is at least double the size of X
1039 - DY is at least double the size of Y
1040 - DX, DY, DDIFF, DAD all have the same type
1041 - sum is the same size of DAD or bigger
1042 - sum has been recognized as a reduction variable.
1044 This is equivalent to:
1045 DDIFF = X w- Y; #widen sub
1046 DAD = ABS_EXPR <DDIFF>;
1047 sum_1 = DAD w+ sum_0; #widen summation
1049 DDIFF = X w- Y; #widen sub
1050 DAD = ABS_EXPR <DDIFF>;
1051 sum_1 = DAD + sum_0; #summation
1054 /* Starting from LAST_STMT, follow the defs of its uses in search
1055 of the above pattern. */
1057 tree plus_oprnd0, plus_oprnd1;
1058 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1059 &plus_oprnd0, &plus_oprnd1))
1060 return NULL;
1062 tree sum_type = gimple_expr_type (last_stmt);
1064 /* Any non-truncating sequence of conversions is OK here, since
1065 with a successful match, the result of the ABS(U) is known to fit
1066 within the nonnegative range of the result type. (It cannot be the
1067 negative of the minimum signed value due to the range of the widening
1068 MINUS_EXPR.) */
1069 vect_unpromoted_value unprom_abs;
1070 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1071 &unprom_abs);
1073 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1074 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1075 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1076 Then check that plus_oprnd0 is defined by an abs_expr. */
1078 if (!plus_oprnd0)
1079 return NULL;
1081 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1082 if (!abs_stmt_vinfo)
1083 return NULL;
1085 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1086 inside the loop (in case we are analyzing an outer-loop). */
1087 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1088 if (!abs_stmt
1089 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1090 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1091 return NULL;
1093 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1094 tree abs_type = TREE_TYPE (abs_oprnd);
1095 if (TYPE_UNSIGNED (abs_type))
1096 return NULL;
1098 /* Peel off conversions from the ABS input. This can involve sign
1099 changes (e.g. from an unsigned subtraction to a signed ABS input)
1100 or signed promotion, but it can't include unsigned promotion.
1101 (Note that ABS of an unsigned promotion should have been folded
1102 away before now anyway.) */
1103 vect_unpromoted_value unprom_diff;
1104 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1105 &unprom_diff);
1106 if (!abs_oprnd)
1107 return NULL;
1108 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1109 && TYPE_UNSIGNED (unprom_diff.type))
1110 return NULL;
1112 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1113 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1114 if (!diff_stmt_vinfo)
1115 return NULL;
1117 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1118 inside the loop (in case we are analyzing an outer-loop). */
1119 vect_unpromoted_value unprom[2];
1120 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1121 false, 2, unprom, &half_type))
1122 return NULL;
1124 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1126 tree half_vectype;
1127 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1128 type_out, &half_vectype))
1129 return NULL;
1131 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1132 tree sad_oprnd[2];
1133 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1134 unprom, half_vectype);
1136 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1137 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1138 sad_oprnd[1], plus_oprnd1);
1140 return pattern_stmt;
1143 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1144 so that it can be treated as though it had the form:
1146 A_TYPE a;
1147 B_TYPE b;
1148 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1149 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1150 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1151 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1152 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1154 Try to replace the pattern with:
1156 A_TYPE a;
1157 B_TYPE b;
1158 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1159 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1160 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1161 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1163 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1165 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1166 name of the pattern being matched, for dump purposes. */
1168 static gimple *
1169 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1170 tree_code orig_code, tree_code wide_code,
1171 bool shift_p, const char *name)
1173 gimple *last_stmt = last_stmt_info->stmt;
1175 vect_unpromoted_value unprom[2];
1176 tree half_type;
1177 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1178 shift_p, 2, unprom, &half_type))
1179 return NULL;
1181 /* Pattern detected. */
1182 vect_pattern_detected (name, last_stmt);
1184 tree type = gimple_expr_type (last_stmt);
1185 tree itype = type;
1186 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1187 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1188 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1189 TYPE_UNSIGNED (half_type));
1191 /* Check target support */
1192 tree vectype = get_vectype_for_scalar_type (half_type);
1193 tree vecitype = get_vectype_for_scalar_type (itype);
1194 enum tree_code dummy_code;
1195 int dummy_int;
1196 auto_vec<tree> dummy_vec;
1197 if (!vectype
1198 || !vecitype
1199 || !supportable_widening_operation (wide_code, last_stmt,
1200 vecitype, vectype,
1201 &dummy_code, &dummy_code,
1202 &dummy_int, &dummy_vec))
1203 return NULL;
1205 *type_out = get_vectype_for_scalar_type (type);
1206 if (!*type_out)
1207 return NULL;
1209 tree oprnd[2];
1210 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1212 tree var = vect_recog_temp_ssa_var (itype, NULL);
1213 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1214 oprnd[0], oprnd[1]);
1216 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1219 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1220 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1222 static gimple *
1223 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1225 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1226 WIDEN_MULT_EXPR, false,
1227 "vect_recog_widen_mult_pattern");
1230 /* Function vect_recog_pow_pattern
1232 Try to find the following pattern:
1234 x = POW (y, N);
1236 with POW being one of pow, powf, powi, powif and N being
1237 either 2 or 0.5.
1239 Input:
1241 * STMT_VINFO: The stmt from which the pattern search begins.
1243 Output:
1245 * TYPE_OUT: The type of the output of this pattern.
1247 * Return value: A new stmt that will be used to replace the sequence of
1248 stmts that constitute the pattern. In this case it will be:
1249 x = x * x
1251 x = sqrt (x)
1254 static gimple *
1255 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1257 gimple *last_stmt = stmt_vinfo->stmt;
1258 tree base, exp;
1259 gimple *stmt;
1260 tree var;
1262 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1263 return NULL;
1265 switch (gimple_call_combined_fn (last_stmt))
1267 CASE_CFN_POW:
1268 CASE_CFN_POWI:
1269 break;
1271 default:
1272 return NULL;
1275 base = gimple_call_arg (last_stmt, 0);
1276 exp = gimple_call_arg (last_stmt, 1);
1277 if (TREE_CODE (exp) != REAL_CST
1278 && TREE_CODE (exp) != INTEGER_CST)
1280 if (flag_unsafe_math_optimizations
1281 && TREE_CODE (base) == REAL_CST
1282 && !gimple_call_internal_p (last_stmt))
1284 combined_fn log_cfn;
1285 built_in_function exp_bfn;
1286 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1288 case BUILT_IN_POW:
1289 log_cfn = CFN_BUILT_IN_LOG;
1290 exp_bfn = BUILT_IN_EXP;
1291 break;
1292 case BUILT_IN_POWF:
1293 log_cfn = CFN_BUILT_IN_LOGF;
1294 exp_bfn = BUILT_IN_EXPF;
1295 break;
1296 case BUILT_IN_POWL:
1297 log_cfn = CFN_BUILT_IN_LOGL;
1298 exp_bfn = BUILT_IN_EXPL;
1299 break;
1300 default:
1301 return NULL;
1303 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1304 tree exp_decl = builtin_decl_implicit (exp_bfn);
1305 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1306 does that, but if C is a power of 2, we want to use
1307 exp2 (log2 (C) * x) in the non-vectorized version, but for
1308 vectorization we don't have vectorized exp2. */
1309 if (logc
1310 && TREE_CODE (logc) == REAL_CST
1311 && exp_decl
1312 && lookup_attribute ("omp declare simd",
1313 DECL_ATTRIBUTES (exp_decl)))
1315 cgraph_node *node = cgraph_node::get_create (exp_decl);
1316 if (node->simd_clones == NULL)
1318 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1319 || node->definition)
1320 return NULL;
1321 expand_simd_clones (node);
1322 if (node->simd_clones == NULL)
1323 return NULL;
1325 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1326 if (!*type_out)
1327 return NULL;
1328 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1329 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1330 append_pattern_def_seq (stmt_vinfo, g);
1331 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1332 g = gimple_build_call (exp_decl, 1, def);
1333 gimple_call_set_lhs (g, res);
1334 return g;
1338 return NULL;
1341 /* We now have a pow or powi builtin function call with a constant
1342 exponent. */
1344 /* Catch squaring. */
1345 if ((tree_fits_shwi_p (exp)
1346 && tree_to_shwi (exp) == 2)
1347 || (TREE_CODE (exp) == REAL_CST
1348 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1350 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1351 TREE_TYPE (base), type_out))
1352 return NULL;
1354 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1355 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1356 return stmt;
1359 /* Catch square root. */
1360 if (TREE_CODE (exp) == REAL_CST
1361 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1363 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1364 if (*type_out
1365 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1366 OPTIMIZE_FOR_SPEED))
1368 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1369 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1370 gimple_call_set_lhs (stmt, var);
1371 gimple_call_set_nothrow (stmt, true);
1372 return stmt;
1376 return NULL;
1380 /* Function vect_recog_widen_sum_pattern
1382 Try to find the following pattern:
1384 type x_t;
1385 TYPE x_T, sum = init;
1386 loop:
1387 sum_0 = phi <init, sum_1>
1388 S1 x_t = *p;
1389 S2 x_T = (TYPE) x_t;
1390 S3 sum_1 = x_T + sum_0;
1392 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1393 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1394 a special case of a reduction computation.
1396 Input:
1398 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1399 when this function is called with S3, the pattern {S2,S3} will be detected.
1401 Output:
1403 * TYPE_OUT: The type of the output of this pattern.
1405 * Return value: A new stmt that will be used to replace the sequence of
1406 stmts that constitute the pattern. In this case it will be:
1407 WIDEN_SUM <x_t, sum_0>
1409 Note: The widening-sum idiom is a widening reduction pattern that is
1410 vectorized without preserving all the intermediate results. It
1411 produces only N/2 (widened) results (by summing up pairs of
1412 intermediate results) rather than all N results. Therefore, we
1413 cannot allow this pattern when we want to get all the results and in
1414 the correct order (as is the case when this computation is in an
1415 inner-loop nested in an outer-loop that us being vectorized). */
1417 static gimple *
1418 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1420 gimple *last_stmt = stmt_vinfo->stmt;
1421 tree oprnd0, oprnd1;
1422 vec_info *vinfo = stmt_vinfo->vinfo;
1423 tree type;
1424 gimple *pattern_stmt;
1425 tree var;
1427 /* Look for the following pattern
1428 DX = (TYPE) X;
1429 sum_1 = DX + sum_0;
1430 In which DX is at least double the size of X, and sum_1 has been
1431 recognized as a reduction variable.
1434 /* Starting from LAST_STMT, follow the defs of its uses in search
1435 of the above pattern. */
1437 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1438 &oprnd0, &oprnd1))
1439 return NULL;
1441 type = gimple_expr_type (last_stmt);
1443 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1444 we know that oprnd1 is the reduction variable (defined by a loop-header
1445 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1446 Left to check that oprnd0 is defined by a cast from type 'type' to type
1447 'TYPE'. */
1449 vect_unpromoted_value unprom0;
1450 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1451 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1452 return NULL;
1454 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1456 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1457 type_out))
1458 return NULL;
1460 var = vect_recog_temp_ssa_var (type, NULL);
1461 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1463 return pattern_stmt;
1466 /* Recognize cases in which an operation is performed in one type WTYPE
1467 but could be done more efficiently in a narrower type NTYPE. For example,
1468 if we have:
1470 ATYPE a; // narrower than NTYPE
1471 BTYPE b; // narrower than NTYPE
1472 WTYPE aw = (WTYPE) a;
1473 WTYPE bw = (WTYPE) b;
1474 WTYPE res = aw + bw; // only uses of aw and bw
1476 then it would be more efficient to do:
1478 NTYPE an = (NTYPE) a;
1479 NTYPE bn = (NTYPE) b;
1480 NTYPE resn = an + bn;
1481 WTYPE res = (WTYPE) resn;
1483 Other situations include things like:
1485 ATYPE a; // NTYPE or narrower
1486 WTYPE aw = (WTYPE) a;
1487 WTYPE res = aw + b;
1489 when only "(NTYPE) res" is significant. In that case it's more efficient
1490 to truncate "b" and do the operation on NTYPE instead:
1492 NTYPE an = (NTYPE) a;
1493 NTYPE bn = (NTYPE) b; // truncation
1494 NTYPE resn = an + bn;
1495 WTYPE res = (WTYPE) resn;
1497 All users of "res" should then use "resn" instead, making the final
1498 statement dead (not marked as relevant). The final statement is still
1499 needed to maintain the type correctness of the IR.
1501 vect_determine_precisions has already determined the minimum
1502 precison of the operation and the minimum precision required
1503 by users of the result. */
1505 static gimple *
1506 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1508 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1509 if (!last_stmt)
1510 return NULL;
1512 /* See whether we have found that this operation can be done on a
1513 narrower type without changing its semantics. */
1514 unsigned int new_precision = last_stmt_info->operation_precision;
1515 if (!new_precision)
1516 return NULL;
1518 vec_info *vinfo = last_stmt_info->vinfo;
1519 tree lhs = gimple_assign_lhs (last_stmt);
1520 tree type = TREE_TYPE (lhs);
1521 tree_code code = gimple_assign_rhs_code (last_stmt);
1523 /* Keep the first operand of a COND_EXPR as-is: only the other two
1524 operands are interesting. */
1525 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1527 /* Check the operands. */
1528 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1529 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1530 unprom.quick_grow (nops);
1531 unsigned int min_precision = 0;
1532 bool single_use_p = false;
1533 for (unsigned int i = 0; i < nops; ++i)
1535 tree op = gimple_op (last_stmt, first_op + i);
1536 if (TREE_CODE (op) == INTEGER_CST)
1537 unprom[i].set_op (op, vect_constant_def);
1538 else if (TREE_CODE (op) == SSA_NAME)
1540 bool op_single_use_p = true;
1541 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1542 &op_single_use_p))
1543 return NULL;
1544 /* If:
1546 (1) N bits of the result are needed;
1547 (2) all inputs are widened from M<N bits; and
1548 (3) one operand OP is a single-use SSA name
1550 we can shift the M->N widening from OP to the output
1551 without changing the number or type of extensions involved.
1552 This then reduces the number of copies of STMT_INFO.
1554 If instead of (3) more than one operand is a single-use SSA name,
1555 shifting the extension to the output is even more of a win.
1557 If instead:
1559 (1) N bits of the result are needed;
1560 (2) one operand OP2 is widened from M2<N bits;
1561 (3) another operand OP1 is widened from M1<M2 bits; and
1562 (4) both OP1 and OP2 are single-use
1564 the choice is between:
1566 (a) truncating OP2 to M1, doing the operation on M1,
1567 and then widening the result to N
1569 (b) widening OP1 to M2, doing the operation on M2, and then
1570 widening the result to N
1572 Both shift the M2->N widening of the inputs to the output.
1573 (a) additionally shifts the M1->M2 widening to the output;
1574 it requires fewer copies of STMT_INFO but requires an extra
1575 M2->M1 truncation.
1577 Which is better will depend on the complexity and cost of
1578 STMT_INFO, which is hard to predict at this stage. However,
1579 a clear tie-breaker in favor of (b) is the fact that the
1580 truncation in (a) increases the length of the operation chain.
1582 If instead of (4) only one of OP1 or OP2 is single-use,
1583 (b) is still a win over doing the operation in N bits:
1584 it still shifts the M2->N widening on the single-use operand
1585 to the output and reduces the number of STMT_INFO copies.
1587 If neither operand is single-use then operating on fewer than
1588 N bits might lead to more extensions overall. Whether it does
1589 or not depends on global information about the vectorization
1590 region, and whether that's a good trade-off would again
1591 depend on the complexity and cost of the statements involved,
1592 as well as things like register pressure that are not normally
1593 modelled at this stage. We therefore ignore these cases
1594 and just optimize the clear single-use wins above.
1596 Thus we take the maximum precision of the unpromoted operands
1597 and record whether any operand is single-use. */
1598 if (unprom[i].dt == vect_internal_def)
1600 min_precision = MAX (min_precision,
1601 TYPE_PRECISION (unprom[i].type));
1602 single_use_p |= op_single_use_p;
1607 /* Although the operation could be done in operation_precision, we have
1608 to balance that against introducing extra truncations or extensions.
1609 Calculate the minimum precision that can be handled efficiently.
1611 The loop above determined that the operation could be handled
1612 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1613 extension from the inputs to the output without introducing more
1614 instructions, and would reduce the number of instructions required
1615 for STMT_INFO itself.
1617 vect_determine_precisions has also determined that the result only
1618 needs min_output_precision bits. Truncating by a factor of N times
1619 requires a tree of N - 1 instructions, so if TYPE is N times wider
1620 than min_output_precision, doing the operation in TYPE and truncating
1621 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1622 In contrast:
1624 - truncating the input to a unary operation and doing the operation
1625 in the new type requires at most N - 1 + 1 = N instructions per
1626 output vector
1628 - doing the same for a binary operation requires at most
1629 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1631 Both unary and binary operations require fewer instructions than
1632 this if the operands were extended from a suitable truncated form.
1633 Thus there is usually nothing to lose by doing operations in
1634 min_output_precision bits, but there can be something to gain. */
1635 if (!single_use_p)
1636 min_precision = last_stmt_info->min_output_precision;
1637 else
1638 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1640 /* Apply the minimum efficient precision we just calculated. */
1641 if (new_precision < min_precision)
1642 new_precision = min_precision;
1643 if (new_precision >= TYPE_PRECISION (type))
1644 return NULL;
1646 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1648 *type_out = get_vectype_for_scalar_type (type);
1649 if (!*type_out)
1650 return NULL;
1652 /* We've found a viable pattern. Get the new type of the operation. */
1653 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1654 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1656 /* We specifically don't check here whether the target supports the
1657 new operation, since it might be something that a later pattern
1658 wants to rewrite anyway. If targets have a minimum element size
1659 for some optabs, we should pattern-match smaller ops to larger ops
1660 where beneficial. */
1661 tree new_vectype = get_vectype_for_scalar_type (new_type);
1662 if (!new_vectype)
1663 return NULL;
1665 if (dump_enabled_p ())
1667 dump_printf_loc (MSG_NOTE, vect_location, "demoting ");
1668 dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
1669 dump_printf (MSG_NOTE, " to ");
1670 dump_generic_expr (MSG_NOTE, TDF_SLIM, new_type);
1671 dump_printf (MSG_NOTE, "\n");
1674 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1675 tree ops[3] = {};
1676 for (unsigned int i = 1; i < first_op; ++i)
1677 ops[i - 1] = gimple_op (last_stmt, i);
1678 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1679 new_type, &unprom[0], new_vectype);
1681 /* Use the operation to produce a result of type NEW_TYPE. */
1682 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1683 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1684 ops[0], ops[1], ops[2]);
1685 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1687 if (dump_enabled_p ())
1689 dump_printf_loc (MSG_NOTE, vect_location,
1690 "created pattern stmt: ");
1691 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1694 pattern_stmt = vect_convert_output (last_stmt_info, type,
1695 pattern_stmt, new_vectype);
1697 return pattern_stmt;
1700 /* Recognize the patterns:
1702 ATYPE a; // narrower than TYPE
1703 BTYPE b; // narrower than TYPE
1704 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1705 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1707 where only the bottom half of avg is used. Try to transform them into:
1709 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1710 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1712 followed by:
1714 TYPE avg = (TYPE) avg';
1716 where NTYPE is no wider than half of TYPE. Since only the bottom half
1717 of avg is used, all or part of the cast of avg' should become redundant. */
1719 static gimple *
1720 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1722 /* Check for a shift right by one bit. */
1723 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1724 vec_info *vinfo = last_stmt_info->vinfo;
1725 if (!last_stmt
1726 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1727 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1728 return NULL;
1730 /* Check that the shift result is wider than the users of the
1731 result need (i.e. that narrowing would be a natural choice). */
1732 tree lhs = gimple_assign_lhs (last_stmt);
1733 tree type = TREE_TYPE (lhs);
1734 unsigned int target_precision
1735 = vect_element_precision (last_stmt_info->min_output_precision);
1736 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1737 return NULL;
1739 /* Get the definition of the shift input. */
1740 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1741 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1742 if (!plus_stmt_info)
1743 return NULL;
1745 /* Check whether the shift input can be seen as a tree of additions on
1746 2 or 3 widened inputs.
1748 Note that the pattern should be a win even if the result of one or
1749 more additions is reused elsewhere: if the pattern matches, we'd be
1750 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1751 internal_fn ifn = IFN_AVG_FLOOR;
1752 vect_unpromoted_value unprom[3];
1753 tree new_type;
1754 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1755 PLUS_EXPR, false, 3,
1756 unprom, &new_type);
1757 if (nops == 0)
1758 return NULL;
1759 if (nops == 3)
1761 /* Check that one operand is 1. */
1762 unsigned int i;
1763 for (i = 0; i < 3; ++i)
1764 if (integer_onep (unprom[i].op))
1765 break;
1766 if (i == 3)
1767 return NULL;
1768 /* Throw away the 1 operand and keep the other two. */
1769 if (i < 2)
1770 unprom[i] = unprom[2];
1771 ifn = IFN_AVG_CEIL;
1774 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1776 /* We know that:
1778 (a) the operation can be viewed as:
1780 TYPE widened0 = (TYPE) UNPROM[0];
1781 TYPE widened1 = (TYPE) UNPROM[1];
1782 TYPE tmp1 = widened0 + widened1 {+ 1};
1783 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1785 (b) the first two statements are equivalent to:
1787 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1788 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1790 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1791 where sensible;
1793 (d) all the operations can be performed correctly at twice the width of
1794 NEW_TYPE, due to the nature of the average operation; and
1796 (e) users of the result of the right shift need only TARGET_PRECISION
1797 bits, where TARGET_PRECISION is no more than half of TYPE's
1798 precision.
1800 Under these circumstances, the only situation in which NEW_TYPE
1801 could be narrower than TARGET_PRECISION is if widened0, widened1
1802 and an addition result are all used more than once. Thus we can
1803 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1804 as "free", whereas widening the result of the average instruction
1805 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1806 therefore better not to go narrower than TARGET_PRECISION. */
1807 if (TYPE_PRECISION (new_type) < target_precision)
1808 new_type = build_nonstandard_integer_type (target_precision,
1809 TYPE_UNSIGNED (new_type));
1811 /* Check for target support. */
1812 tree new_vectype = get_vectype_for_scalar_type (new_type);
1813 if (!new_vectype
1814 || !direct_internal_fn_supported_p (ifn, new_vectype,
1815 OPTIMIZE_FOR_SPEED))
1816 return NULL;
1818 /* The IR requires a valid vector type for the cast result, even though
1819 it's likely to be discarded. */
1820 *type_out = get_vectype_for_scalar_type (type);
1821 if (!*type_out)
1822 return NULL;
1824 /* Generate the IFN_AVG* call. */
1825 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1826 tree new_ops[2];
1827 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1828 unprom, new_vectype);
1829 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1830 new_ops[1]);
1831 gimple_call_set_lhs (average_stmt, new_var);
1832 gimple_set_location (average_stmt, gimple_location (last_stmt));
1834 if (dump_enabled_p ())
1836 dump_printf_loc (MSG_NOTE, vect_location,
1837 "created pattern stmt: ");
1838 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0);
1841 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1844 /* Recognize cases in which the input to a cast is wider than its
1845 output, and the input is fed by a widening operation. Fold this
1846 by removing the unnecessary intermediate widening. E.g.:
1848 unsigned char a;
1849 unsigned int b = (unsigned int) a;
1850 unsigned short c = (unsigned short) b;
1854 unsigned short c = (unsigned short) a;
1856 Although this is rare in input IR, it is an expected side-effect
1857 of the over-widening pattern above.
1859 This is beneficial also for integer-to-float conversions, if the
1860 widened integer has more bits than the float, and if the unwidened
1861 input doesn't. */
1863 static gimple *
1864 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1866 /* Check for a cast, including an integer-to-float conversion. */
1867 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1868 if (!last_stmt)
1869 return NULL;
1870 tree_code code = gimple_assign_rhs_code (last_stmt);
1871 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1872 return NULL;
1874 /* Make sure that the rhs is a scalar with a natural bitsize. */
1875 tree lhs = gimple_assign_lhs (last_stmt);
1876 if (!lhs)
1877 return NULL;
1878 tree lhs_type = TREE_TYPE (lhs);
1879 scalar_mode lhs_mode;
1880 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1881 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1882 return NULL;
1884 /* Check for a narrowing operation (from a vector point of view). */
1885 tree rhs = gimple_assign_rhs1 (last_stmt);
1886 tree rhs_type = TREE_TYPE (rhs);
1887 if (!INTEGRAL_TYPE_P (rhs_type)
1888 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1889 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1890 return NULL;
1892 /* Try to find an unpromoted input. */
1893 vec_info *vinfo = last_stmt_info->vinfo;
1894 vect_unpromoted_value unprom;
1895 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1896 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1897 return NULL;
1899 /* If the bits above RHS_TYPE matter, make sure that they're the
1900 same when extending from UNPROM as they are when extending from RHS. */
1901 if (!INTEGRAL_TYPE_P (lhs_type)
1902 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1903 return NULL;
1905 /* We can get the same result by casting UNPROM directly, to avoid
1906 the unnecessary widening and narrowing. */
1907 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1909 *type_out = get_vectype_for_scalar_type (lhs_type);
1910 if (!*type_out)
1911 return NULL;
1913 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1914 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1915 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1917 return pattern_stmt;
1920 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1921 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1923 static gimple *
1924 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1926 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
1927 WIDEN_LSHIFT_EXPR, true,
1928 "vect_recog_widen_shift_pattern");
1931 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1933 type a_t, b_t, c_t;
1935 S0 a_t = b_t r<< c_t;
1937 Input/Output:
1939 * STMT_VINFO: The stmt from which the pattern search begins,
1940 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1941 with a sequence:
1943 S1 d_t = -c_t;
1944 S2 e_t = d_t & (B - 1);
1945 S3 f_t = b_t << c_t;
1946 S4 g_t = b_t >> e_t;
1947 S0 a_t = f_t | g_t;
1949 where B is element bitsize of type.
1951 Output:
1953 * TYPE_OUT: The type of the output of this pattern.
1955 * Return value: A new stmt that will be used to replace the rotate
1956 S0 stmt. */
1958 static gimple *
1959 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1961 gimple *last_stmt = stmt_vinfo->stmt;
1962 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1963 gimple *pattern_stmt, *def_stmt;
1964 enum tree_code rhs_code;
1965 vec_info *vinfo = stmt_vinfo->vinfo;
1966 enum vect_def_type dt;
1967 optab optab1, optab2;
1968 edge ext_def = NULL;
1970 if (!is_gimple_assign (last_stmt))
1971 return NULL;
1973 rhs_code = gimple_assign_rhs_code (last_stmt);
1974 switch (rhs_code)
1976 case LROTATE_EXPR:
1977 case RROTATE_EXPR:
1978 break;
1979 default:
1980 return NULL;
1983 lhs = gimple_assign_lhs (last_stmt);
1984 oprnd0 = gimple_assign_rhs1 (last_stmt);
1985 type = TREE_TYPE (oprnd0);
1986 oprnd1 = gimple_assign_rhs2 (last_stmt);
1987 if (TREE_CODE (oprnd0) != SSA_NAME
1988 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1989 || !INTEGRAL_TYPE_P (type)
1990 || !TYPE_UNSIGNED (type))
1991 return NULL;
1993 stmt_vec_info def_stmt_info;
1994 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
1995 return NULL;
1997 if (dt != vect_internal_def
1998 && dt != vect_constant_def
1999 && dt != vect_external_def)
2000 return NULL;
2002 vectype = get_vectype_for_scalar_type (type);
2003 if (vectype == NULL_TREE)
2004 return NULL;
2006 /* If vector/vector or vector/scalar rotate is supported by the target,
2007 don't do anything here. */
2008 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2009 if (optab1
2010 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2011 return NULL;
2013 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2015 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2016 if (optab2
2017 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2018 return NULL;
2021 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2022 don't do anything here either. */
2023 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2024 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2025 if (!optab1
2026 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2027 || !optab2
2028 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2030 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2031 return NULL;
2032 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2033 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2034 if (!optab1
2035 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2036 || !optab2
2037 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2038 return NULL;
2041 *type_out = vectype;
2043 if (dt == vect_external_def
2044 && TREE_CODE (oprnd1) == SSA_NAME)
2045 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2047 def = NULL_TREE;
2048 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2049 if (TREE_CODE (oprnd1) == INTEGER_CST
2050 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2051 def = oprnd1;
2052 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2054 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2055 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2056 && TYPE_PRECISION (TREE_TYPE (rhs1))
2057 == TYPE_PRECISION (type))
2058 def = rhs1;
2061 if (def == NULL_TREE)
2063 def = vect_recog_temp_ssa_var (type, NULL);
2064 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2065 if (ext_def)
2067 basic_block new_bb
2068 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2069 gcc_assert (!new_bb);
2071 else
2072 append_pattern_def_seq (stmt_vinfo, def_stmt);
2074 stype = TREE_TYPE (def);
2075 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2077 if (TREE_CODE (def) == INTEGER_CST)
2079 if (!tree_fits_uhwi_p (def)
2080 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2081 || integer_zerop (def))
2082 return NULL;
2083 def2 = build_int_cst (stype,
2084 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2086 else
2088 tree vecstype = get_vectype_for_scalar_type (stype);
2090 if (vecstype == NULL_TREE)
2091 return NULL;
2092 def2 = vect_recog_temp_ssa_var (stype, NULL);
2093 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2094 if (ext_def)
2096 basic_block new_bb
2097 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2098 gcc_assert (!new_bb);
2100 else
2101 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2103 def2 = vect_recog_temp_ssa_var (stype, NULL);
2104 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2105 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2106 gimple_assign_lhs (def_stmt), mask);
2107 if (ext_def)
2109 basic_block new_bb
2110 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2111 gcc_assert (!new_bb);
2113 else
2114 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2117 var1 = vect_recog_temp_ssa_var (type, NULL);
2118 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2119 ? LSHIFT_EXPR : RSHIFT_EXPR,
2120 oprnd0, def);
2121 append_pattern_def_seq (stmt_vinfo, def_stmt);
2123 var2 = vect_recog_temp_ssa_var (type, NULL);
2124 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2125 ? RSHIFT_EXPR : LSHIFT_EXPR,
2126 oprnd0, def2);
2127 append_pattern_def_seq (stmt_vinfo, def_stmt);
2129 /* Pattern detected. */
2130 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2132 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2133 var = vect_recog_temp_ssa_var (type, NULL);
2134 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2136 return pattern_stmt;
2139 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2140 vectorized:
2142 type a_t;
2143 TYPE b_T, res_T;
2145 S1 a_t = ;
2146 S2 b_T = ;
2147 S3 res_T = b_T op a_t;
2149 where type 'TYPE' is a type with different size than 'type',
2150 and op is <<, >> or rotate.
2152 Also detect cases:
2154 type a_t;
2155 TYPE b_T, c_T, res_T;
2157 S0 c_T = ;
2158 S1 a_t = (type) c_T;
2159 S2 b_T = ;
2160 S3 res_T = b_T op a_t;
2162 Input/Output:
2164 * STMT_VINFO: The stmt from which the pattern search begins,
2165 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2166 with a shift/rotate which has same type on both operands, in the
2167 second case just b_T op c_T, in the first case with added cast
2168 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2170 Output:
2172 * TYPE_OUT: The type of the output of this pattern.
2174 * Return value: A new stmt that will be used to replace the shift/rotate
2175 S3 stmt. */
2177 static gimple *
2178 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2179 tree *type_out)
2181 gimple *last_stmt = stmt_vinfo->stmt;
2182 tree oprnd0, oprnd1, lhs, var;
2183 gimple *pattern_stmt;
2184 enum tree_code rhs_code;
2185 vec_info *vinfo = stmt_vinfo->vinfo;
2187 if (!is_gimple_assign (last_stmt))
2188 return NULL;
2190 rhs_code = gimple_assign_rhs_code (last_stmt);
2191 switch (rhs_code)
2193 case LSHIFT_EXPR:
2194 case RSHIFT_EXPR:
2195 case LROTATE_EXPR:
2196 case RROTATE_EXPR:
2197 break;
2198 default:
2199 return NULL;
2202 lhs = gimple_assign_lhs (last_stmt);
2203 oprnd0 = gimple_assign_rhs1 (last_stmt);
2204 oprnd1 = gimple_assign_rhs2 (last_stmt);
2205 if (TREE_CODE (oprnd0) != SSA_NAME
2206 || TREE_CODE (oprnd1) != SSA_NAME
2207 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2208 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2209 || TYPE_PRECISION (TREE_TYPE (lhs))
2210 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2211 return NULL;
2213 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2214 if (!def_vinfo)
2215 return NULL;
2217 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2218 if (*type_out == NULL_TREE)
2219 return NULL;
2221 tree def = NULL_TREE;
2222 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2223 if (def_stmt && gimple_assign_cast_p (def_stmt))
2225 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2226 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2227 && TYPE_PRECISION (TREE_TYPE (rhs1))
2228 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2230 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2231 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2232 def = rhs1;
2233 else
2235 tree mask
2236 = build_low_bits_mask (TREE_TYPE (rhs1),
2237 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2238 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2239 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2240 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2241 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2246 if (def == NULL_TREE)
2248 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2249 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2250 append_pattern_def_seq (stmt_vinfo, def_stmt);
2253 /* Pattern detected. */
2254 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2256 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2257 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2258 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2260 return pattern_stmt;
2263 /* Return true iff the target has a vector optab implementing the operation
2264 CODE on type VECTYPE. */
2266 static bool
2267 target_has_vecop_for_code (tree_code code, tree vectype)
2269 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2270 return voptab
2271 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2274 /* Verify that the target has optabs of VECTYPE to perform all the steps
2275 needed by the multiplication-by-immediate synthesis algorithm described by
2276 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2277 present. Return true iff the target supports all the steps. */
2279 static bool
2280 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2281 tree vectype, bool synth_shift_p)
2283 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2284 return false;
2286 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2287 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2289 if (var == negate_variant
2290 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2291 return false;
2293 /* If we must synthesize shifts with additions make sure that vector
2294 addition is available. */
2295 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2296 return false;
2298 for (int i = 1; i < alg->ops; i++)
2300 switch (alg->op[i])
2302 case alg_shift:
2303 break;
2304 case alg_add_t_m2:
2305 case alg_add_t2_m:
2306 case alg_add_factor:
2307 if (!supports_vplus)
2308 return false;
2309 break;
2310 case alg_sub_t_m2:
2311 case alg_sub_t2_m:
2312 case alg_sub_factor:
2313 if (!supports_vminus)
2314 return false;
2315 break;
2316 case alg_unknown:
2317 case alg_m:
2318 case alg_zero:
2319 case alg_impossible:
2320 return false;
2321 default:
2322 gcc_unreachable ();
2326 return true;
2329 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2330 putting the final result in DEST. Append all statements but the last into
2331 VINFO. Return the last statement. */
2333 static gimple *
2334 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2335 stmt_vec_info vinfo)
2337 HOST_WIDE_INT i;
2338 tree itype = TREE_TYPE (op);
2339 tree prev_res = op;
2340 gcc_assert (amnt >= 0);
2341 for (i = 0; i < amnt; i++)
2343 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2344 : dest;
2345 gimple *stmt
2346 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2347 prev_res = tmp_var;
2348 if (i < amnt - 1)
2349 append_pattern_def_seq (vinfo, stmt);
2350 else
2351 return stmt;
2353 gcc_unreachable ();
2354 return NULL;
2357 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2358 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2359 the process if necessary. Append the resulting assignment statements
2360 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2361 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2362 left shifts using additions. */
2364 static tree
2365 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2366 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2368 if (integer_zerop (op2)
2369 && (code == LSHIFT_EXPR
2370 || code == PLUS_EXPR))
2372 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2373 return op1;
2376 gimple *stmt;
2377 tree itype = TREE_TYPE (op1);
2378 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2380 if (code == LSHIFT_EXPR
2381 && synth_shift_p)
2383 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2384 stmt_vinfo);
2385 append_pattern_def_seq (stmt_vinfo, stmt);
2386 return tmp_var;
2389 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2390 append_pattern_def_seq (stmt_vinfo, stmt);
2391 return tmp_var;
2394 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2395 and simple arithmetic operations to be vectorized. Record the statements
2396 produced in STMT_VINFO and return the last statement in the sequence or
2397 NULL if it's not possible to synthesize such a multiplication.
2398 This function mirrors the behavior of expand_mult_const in expmed.c but
2399 works on tree-ssa form. */
2401 static gimple *
2402 vect_synth_mult_by_constant (tree op, tree val,
2403 stmt_vec_info stmt_vinfo)
2405 tree itype = TREE_TYPE (op);
2406 machine_mode mode = TYPE_MODE (itype);
2407 struct algorithm alg;
2408 mult_variant variant;
2409 if (!tree_fits_shwi_p (val))
2410 return NULL;
2412 /* Multiplication synthesis by shifts, adds and subs can introduce
2413 signed overflow where the original operation didn't. Perform the
2414 operations on an unsigned type and cast back to avoid this.
2415 In the future we may want to relax this for synthesis algorithms
2416 that we can prove do not cause unexpected overflow. */
2417 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2419 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2421 /* Targets that don't support vector shifts but support vector additions
2422 can synthesize shifts that way. */
2423 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2425 HOST_WIDE_INT hwval = tree_to_shwi (val);
2426 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2427 The vectorizer's benefit analysis will decide whether it's beneficial
2428 to do this. */
2429 bool possible = choose_mult_variant (mode, hwval, &alg,
2430 &variant, MAX_COST);
2431 if (!possible)
2432 return NULL;
2434 tree vectype = get_vectype_for_scalar_type (multtype);
2436 if (!vectype
2437 || !target_supports_mult_synth_alg (&alg, variant,
2438 vectype, synth_shift_p))
2439 return NULL;
2441 tree accumulator;
2443 /* Clear out the sequence of statements so we can populate it below. */
2444 gimple *stmt = NULL;
2446 if (cast_to_unsigned_p)
2448 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2449 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2450 append_pattern_def_seq (stmt_vinfo, stmt);
2451 op = tmp_op;
2454 if (alg.op[0] == alg_zero)
2455 accumulator = build_int_cst (multtype, 0);
2456 else
2457 accumulator = op;
2459 bool needs_fixup = (variant == negate_variant)
2460 || (variant == add_variant);
2462 for (int i = 1; i < alg.ops; i++)
2464 tree shft_log = build_int_cst (multtype, alg.log[i]);
2465 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2466 tree tmp_var = NULL_TREE;
2468 switch (alg.op[i])
2470 case alg_shift:
2471 if (synth_shift_p)
2472 stmt
2473 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2474 stmt_vinfo);
2475 else
2476 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2477 shft_log);
2478 break;
2479 case alg_add_t_m2:
2480 tmp_var
2481 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2482 stmt_vinfo, synth_shift_p);
2483 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2484 tmp_var);
2485 break;
2486 case alg_sub_t_m2:
2487 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2488 shft_log, stmt_vinfo,
2489 synth_shift_p);
2490 /* In some algorithms the first step involves zeroing the
2491 accumulator. If subtracting from such an accumulator
2492 just emit the negation directly. */
2493 if (integer_zerop (accumulator))
2494 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2495 else
2496 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2497 tmp_var);
2498 break;
2499 case alg_add_t2_m:
2500 tmp_var
2501 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2502 stmt_vinfo, synth_shift_p);
2503 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2504 break;
2505 case alg_sub_t2_m:
2506 tmp_var
2507 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2508 stmt_vinfo, synth_shift_p);
2509 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2510 break;
2511 case alg_add_factor:
2512 tmp_var
2513 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2514 stmt_vinfo, synth_shift_p);
2515 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2516 tmp_var);
2517 break;
2518 case alg_sub_factor:
2519 tmp_var
2520 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2521 stmt_vinfo, synth_shift_p);
2522 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2523 accumulator);
2524 break;
2525 default:
2526 gcc_unreachable ();
2528 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2529 but rather return it directly. */
2531 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2532 append_pattern_def_seq (stmt_vinfo, stmt);
2533 accumulator = accum_tmp;
2535 if (variant == negate_variant)
2537 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2538 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2539 accumulator = accum_tmp;
2540 if (cast_to_unsigned_p)
2541 append_pattern_def_seq (stmt_vinfo, stmt);
2543 else if (variant == add_variant)
2545 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2546 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2547 accumulator = accum_tmp;
2548 if (cast_to_unsigned_p)
2549 append_pattern_def_seq (stmt_vinfo, stmt);
2551 /* Move back to a signed if needed. */
2552 if (cast_to_unsigned_p)
2554 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2555 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2558 return stmt;
2561 /* Detect multiplication by constant and convert it into a sequence of
2562 shifts and additions, subtractions, negations. We reuse the
2563 choose_mult_variant algorithms from expmed.c
2565 Input/Output:
2567 STMT_VINFO: The stmt from which the pattern search begins,
2568 i.e. the mult stmt.
2570 Output:
2572 * TYPE_OUT: The type of the output of this pattern.
2574 * Return value: A new stmt that will be used to replace
2575 the multiplication. */
2577 static gimple *
2578 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2580 gimple *last_stmt = stmt_vinfo->stmt;
2581 tree oprnd0, oprnd1, vectype, itype;
2582 gimple *pattern_stmt;
2584 if (!is_gimple_assign (last_stmt))
2585 return NULL;
2587 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2588 return NULL;
2590 oprnd0 = gimple_assign_rhs1 (last_stmt);
2591 oprnd1 = gimple_assign_rhs2 (last_stmt);
2592 itype = TREE_TYPE (oprnd0);
2594 if (TREE_CODE (oprnd0) != SSA_NAME
2595 || TREE_CODE (oprnd1) != INTEGER_CST
2596 || !INTEGRAL_TYPE_P (itype)
2597 || !type_has_mode_precision_p (itype))
2598 return NULL;
2600 vectype = get_vectype_for_scalar_type (itype);
2601 if (vectype == NULL_TREE)
2602 return NULL;
2604 /* If the target can handle vectorized multiplication natively,
2605 don't attempt to optimize this. */
2606 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2607 if (mul_optab != unknown_optab)
2609 machine_mode vec_mode = TYPE_MODE (vectype);
2610 int icode = (int) optab_handler (mul_optab, vec_mode);
2611 if (icode != CODE_FOR_nothing)
2612 return NULL;
2615 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2616 if (!pattern_stmt)
2617 return NULL;
2619 /* Pattern detected. */
2620 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2622 *type_out = vectype;
2624 return pattern_stmt;
2627 /* Detect a signed division by a constant that wouldn't be
2628 otherwise vectorized:
2630 type a_t, b_t;
2632 S1 a_t = b_t / N;
2634 where type 'type' is an integral type and N is a constant.
2636 Similarly handle modulo by a constant:
2638 S4 a_t = b_t % N;
2640 Input/Output:
2642 * STMT_VINFO: The stmt from which the pattern search begins,
2643 i.e. the division stmt. S1 is replaced by if N is a power
2644 of two constant and type is signed:
2645 S3 y_t = b_t < 0 ? N - 1 : 0;
2646 S2 x_t = b_t + y_t;
2647 S1' a_t = x_t >> log2 (N);
2649 S4 is replaced if N is a power of two constant and
2650 type is signed by (where *_T temporaries have unsigned type):
2651 S9 y_T = b_t < 0 ? -1U : 0U;
2652 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2653 S7 z_t = (type) z_T;
2654 S6 w_t = b_t + z_t;
2655 S5 x_t = w_t & (N - 1);
2656 S4' a_t = x_t - z_t;
2658 Output:
2660 * TYPE_OUT: The type of the output of this pattern.
2662 * Return value: A new stmt that will be used to replace the division
2663 S1 or modulo S4 stmt. */
2665 static gimple *
2666 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2668 gimple *last_stmt = stmt_vinfo->stmt;
2669 tree oprnd0, oprnd1, vectype, itype, cond;
2670 gimple *pattern_stmt, *def_stmt;
2671 enum tree_code rhs_code;
2672 optab optab;
2673 tree q;
2674 int dummy_int, prec;
2676 if (!is_gimple_assign (last_stmt))
2677 return NULL;
2679 rhs_code = gimple_assign_rhs_code (last_stmt);
2680 switch (rhs_code)
2682 case TRUNC_DIV_EXPR:
2683 case EXACT_DIV_EXPR:
2684 case TRUNC_MOD_EXPR:
2685 break;
2686 default:
2687 return NULL;
2690 oprnd0 = gimple_assign_rhs1 (last_stmt);
2691 oprnd1 = gimple_assign_rhs2 (last_stmt);
2692 itype = TREE_TYPE (oprnd0);
2693 if (TREE_CODE (oprnd0) != SSA_NAME
2694 || TREE_CODE (oprnd1) != INTEGER_CST
2695 || TREE_CODE (itype) != INTEGER_TYPE
2696 || !type_has_mode_precision_p (itype))
2697 return NULL;
2699 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2700 vectype = get_vectype_for_scalar_type (itype);
2701 if (vectype == NULL_TREE)
2702 return NULL;
2704 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2706 /* If the target can handle vectorized division or modulo natively,
2707 don't attempt to optimize this, since native division is likely
2708 to give smaller code. */
2709 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2710 if (optab != unknown_optab)
2712 machine_mode vec_mode = TYPE_MODE (vectype);
2713 int icode = (int) optab_handler (optab, vec_mode);
2714 if (icode != CODE_FOR_nothing)
2715 return NULL;
2719 prec = TYPE_PRECISION (itype);
2720 if (integer_pow2p (oprnd1))
2722 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2723 return NULL;
2725 /* Pattern detected. */
2726 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2728 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2729 build_int_cst (itype, 0));
2730 if (rhs_code == TRUNC_DIV_EXPR
2731 || rhs_code == EXACT_DIV_EXPR)
2733 tree var = vect_recog_temp_ssa_var (itype, NULL);
2734 tree shift;
2735 def_stmt
2736 = gimple_build_assign (var, COND_EXPR, cond,
2737 fold_build2 (MINUS_EXPR, itype, oprnd1,
2738 build_int_cst (itype, 1)),
2739 build_int_cst (itype, 0));
2740 append_pattern_def_seq (stmt_vinfo, def_stmt);
2741 var = vect_recog_temp_ssa_var (itype, NULL);
2742 def_stmt
2743 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2744 gimple_assign_lhs (def_stmt));
2745 append_pattern_def_seq (stmt_vinfo, def_stmt);
2747 shift = build_int_cst (itype, tree_log2 (oprnd1));
2748 pattern_stmt
2749 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2750 RSHIFT_EXPR, var, shift);
2752 else
2754 tree signmask;
2755 if (compare_tree_int (oprnd1, 2) == 0)
2757 signmask = vect_recog_temp_ssa_var (itype, NULL);
2758 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2759 build_int_cst (itype, 1),
2760 build_int_cst (itype, 0));
2761 append_pattern_def_seq (stmt_vinfo, def_stmt);
2763 else
2765 tree utype
2766 = build_nonstandard_integer_type (prec, 1);
2767 tree vecutype = get_vectype_for_scalar_type (utype);
2768 tree shift
2769 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2770 - tree_log2 (oprnd1));
2771 tree var = vect_recog_temp_ssa_var (utype, NULL);
2773 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2774 build_int_cst (utype, -1),
2775 build_int_cst (utype, 0));
2776 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2777 var = vect_recog_temp_ssa_var (utype, NULL);
2778 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2779 gimple_assign_lhs (def_stmt),
2780 shift);
2781 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2782 signmask = vect_recog_temp_ssa_var (itype, NULL);
2783 def_stmt
2784 = gimple_build_assign (signmask, NOP_EXPR, var);
2785 append_pattern_def_seq (stmt_vinfo, def_stmt);
2787 def_stmt
2788 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2789 PLUS_EXPR, oprnd0, signmask);
2790 append_pattern_def_seq (stmt_vinfo, def_stmt);
2791 def_stmt
2792 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2793 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2794 fold_build2 (MINUS_EXPR, itype, oprnd1,
2795 build_int_cst (itype, 1)));
2796 append_pattern_def_seq (stmt_vinfo, def_stmt);
2798 pattern_stmt
2799 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2800 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2801 signmask);
2804 *type_out = vectype;
2805 return pattern_stmt;
2808 if (prec > HOST_BITS_PER_WIDE_INT
2809 || integer_zerop (oprnd1))
2810 return NULL;
2812 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2813 return NULL;
2815 if (TYPE_UNSIGNED (itype))
2817 unsigned HOST_WIDE_INT mh, ml;
2818 int pre_shift, post_shift;
2819 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2820 & GET_MODE_MASK (itype_mode));
2821 tree t1, t2, t3, t4;
2823 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2824 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2825 return NULL;
2827 /* Find a suitable multiplier and right shift count
2828 instead of multiplying with D. */
2829 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2831 /* If the suggested multiplier is more than SIZE bits, we can do better
2832 for even divisors, using an initial right shift. */
2833 if (mh != 0 && (d & 1) == 0)
2835 pre_shift = ctz_or_zero (d);
2836 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2837 &ml, &post_shift, &dummy_int);
2838 gcc_assert (!mh);
2840 else
2841 pre_shift = 0;
2843 if (mh != 0)
2845 if (post_shift - 1 >= prec)
2846 return NULL;
2848 /* t1 = oprnd0 h* ml;
2849 t2 = oprnd0 - t1;
2850 t3 = t2 >> 1;
2851 t4 = t1 + t3;
2852 q = t4 >> (post_shift - 1); */
2853 t1 = vect_recog_temp_ssa_var (itype, NULL);
2854 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2855 build_int_cst (itype, ml));
2856 append_pattern_def_seq (stmt_vinfo, def_stmt);
2858 t2 = vect_recog_temp_ssa_var (itype, NULL);
2859 def_stmt
2860 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2861 append_pattern_def_seq (stmt_vinfo, def_stmt);
2863 t3 = vect_recog_temp_ssa_var (itype, NULL);
2864 def_stmt
2865 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2866 append_pattern_def_seq (stmt_vinfo, def_stmt);
2868 t4 = vect_recog_temp_ssa_var (itype, NULL);
2869 def_stmt
2870 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2872 if (post_shift != 1)
2874 append_pattern_def_seq (stmt_vinfo, def_stmt);
2876 q = vect_recog_temp_ssa_var (itype, NULL);
2877 pattern_stmt
2878 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2879 build_int_cst (itype, post_shift - 1));
2881 else
2883 q = t4;
2884 pattern_stmt = def_stmt;
2887 else
2889 if (pre_shift >= prec || post_shift >= prec)
2890 return NULL;
2892 /* t1 = oprnd0 >> pre_shift;
2893 t2 = t1 h* ml;
2894 q = t2 >> post_shift; */
2895 if (pre_shift)
2897 t1 = vect_recog_temp_ssa_var (itype, NULL);
2898 def_stmt
2899 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2900 build_int_cst (NULL, pre_shift));
2901 append_pattern_def_seq (stmt_vinfo, def_stmt);
2903 else
2904 t1 = oprnd0;
2906 t2 = vect_recog_temp_ssa_var (itype, NULL);
2907 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2908 build_int_cst (itype, ml));
2910 if (post_shift)
2912 append_pattern_def_seq (stmt_vinfo, def_stmt);
2914 q = vect_recog_temp_ssa_var (itype, NULL);
2915 def_stmt
2916 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2917 build_int_cst (itype, post_shift));
2919 else
2920 q = t2;
2922 pattern_stmt = def_stmt;
2925 else
2927 unsigned HOST_WIDE_INT ml;
2928 int post_shift;
2929 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2930 unsigned HOST_WIDE_INT abs_d;
2931 bool add = false;
2932 tree t1, t2, t3, t4;
2934 /* Give up for -1. */
2935 if (d == -1)
2936 return NULL;
2938 /* Since d might be INT_MIN, we have to cast to
2939 unsigned HOST_WIDE_INT before negating to avoid
2940 undefined signed overflow. */
2941 abs_d = (d >= 0
2942 ? (unsigned HOST_WIDE_INT) d
2943 : - (unsigned HOST_WIDE_INT) d);
2945 /* n rem d = n rem -d */
2946 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2948 d = abs_d;
2949 oprnd1 = build_int_cst (itype, abs_d);
2951 else if (HOST_BITS_PER_WIDE_INT >= prec
2952 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2953 /* This case is not handled correctly below. */
2954 return NULL;
2956 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2957 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2959 add = true;
2960 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2962 if (post_shift >= prec)
2963 return NULL;
2965 /* t1 = oprnd0 h* ml; */
2966 t1 = vect_recog_temp_ssa_var (itype, NULL);
2967 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2968 build_int_cst (itype, ml));
2970 if (add)
2972 /* t2 = t1 + oprnd0; */
2973 append_pattern_def_seq (stmt_vinfo, def_stmt);
2974 t2 = vect_recog_temp_ssa_var (itype, NULL);
2975 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2977 else
2978 t2 = t1;
2980 if (post_shift)
2982 /* t3 = t2 >> post_shift; */
2983 append_pattern_def_seq (stmt_vinfo, def_stmt);
2984 t3 = vect_recog_temp_ssa_var (itype, NULL);
2985 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2986 build_int_cst (itype, post_shift));
2988 else
2989 t3 = t2;
2991 wide_int oprnd0_min, oprnd0_max;
2992 int msb = 1;
2993 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2995 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2996 msb = 0;
2997 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2998 msb = -1;
3001 if (msb == 0 && d >= 0)
3003 /* q = t3; */
3004 q = t3;
3005 pattern_stmt = def_stmt;
3007 else
3009 /* t4 = oprnd0 >> (prec - 1);
3010 or if we know from VRP that oprnd0 >= 0
3011 t4 = 0;
3012 or if we know from VRP that oprnd0 < 0
3013 t4 = -1; */
3014 append_pattern_def_seq (stmt_vinfo, def_stmt);
3015 t4 = vect_recog_temp_ssa_var (itype, NULL);
3016 if (msb != 1)
3017 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3018 build_int_cst (itype, msb));
3019 else
3020 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3021 build_int_cst (itype, prec - 1));
3022 append_pattern_def_seq (stmt_vinfo, def_stmt);
3024 /* q = t3 - t4; or q = t4 - t3; */
3025 q = vect_recog_temp_ssa_var (itype, NULL);
3026 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3027 d < 0 ? t3 : t4);
3031 if (rhs_code == TRUNC_MOD_EXPR)
3033 tree r, t1;
3035 /* We divided. Now finish by:
3036 t1 = q * oprnd1;
3037 r = oprnd0 - t1; */
3038 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3040 t1 = vect_recog_temp_ssa_var (itype, NULL);
3041 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3042 append_pattern_def_seq (stmt_vinfo, def_stmt);
3044 r = vect_recog_temp_ssa_var (itype, NULL);
3045 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3048 /* Pattern detected. */
3049 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3051 *type_out = vectype;
3052 return pattern_stmt;
3055 /* Function vect_recog_mixed_size_cond_pattern
3057 Try to find the following pattern:
3059 type x_t, y_t;
3060 TYPE a_T, b_T, c_T;
3061 loop:
3062 S1 a_T = x_t CMP y_t ? b_T : c_T;
3064 where type 'TYPE' is an integral type which has different size
3065 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3066 than 'type', the constants need to fit into an integer type
3067 with the same width as 'type') or results of conversion from 'type'.
3069 Input:
3071 * STMT_VINFO: The stmt from which the pattern search begins.
3073 Output:
3075 * TYPE_OUT: The type of the output of this pattern.
3077 * Return value: A new stmt that will be used to replace the pattern.
3078 Additionally a def_stmt is added.
3080 a_it = x_t CMP y_t ? b_it : c_it;
3081 a_T = (TYPE) a_it; */
3083 static gimple *
3084 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3086 gimple *last_stmt = stmt_vinfo->stmt;
3087 tree cond_expr, then_clause, else_clause;
3088 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3089 gimple *pattern_stmt, *def_stmt;
3090 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3091 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3092 bool promotion;
3093 tree comp_scalar_type;
3095 if (!is_gimple_assign (last_stmt)
3096 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3097 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3098 return NULL;
3100 cond_expr = gimple_assign_rhs1 (last_stmt);
3101 then_clause = gimple_assign_rhs2 (last_stmt);
3102 else_clause = gimple_assign_rhs3 (last_stmt);
3104 if (!COMPARISON_CLASS_P (cond_expr))
3105 return NULL;
3107 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3108 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3109 if (comp_vectype == NULL_TREE)
3110 return NULL;
3112 type = gimple_expr_type (last_stmt);
3113 if (types_compatible_p (type, comp_scalar_type)
3114 || ((TREE_CODE (then_clause) != INTEGER_CST
3115 || TREE_CODE (else_clause) != INTEGER_CST)
3116 && !INTEGRAL_TYPE_P (comp_scalar_type))
3117 || !INTEGRAL_TYPE_P (type))
3118 return NULL;
3120 if ((TREE_CODE (then_clause) != INTEGER_CST
3121 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3122 &def_stmt0, &promotion))
3123 || (TREE_CODE (else_clause) != INTEGER_CST
3124 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3125 &def_stmt1, &promotion)))
3126 return NULL;
3128 if (orig_type0 && orig_type1
3129 && !types_compatible_p (orig_type0, orig_type1))
3130 return NULL;
3132 if (orig_type0)
3134 if (!types_compatible_p (orig_type0, comp_scalar_type))
3135 return NULL;
3136 then_clause = gimple_assign_rhs1 (def_stmt0);
3137 itype = orig_type0;
3140 if (orig_type1)
3142 if (!types_compatible_p (orig_type1, comp_scalar_type))
3143 return NULL;
3144 else_clause = gimple_assign_rhs1 (def_stmt1);
3145 itype = orig_type1;
3149 HOST_WIDE_INT cmp_mode_size
3150 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3152 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3153 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3154 return NULL;
3156 vectype = get_vectype_for_scalar_type (type);
3157 if (vectype == NULL_TREE)
3158 return NULL;
3160 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3161 return NULL;
3163 if (itype == NULL_TREE)
3164 itype = build_nonstandard_integer_type (cmp_mode_size,
3165 TYPE_UNSIGNED (type));
3167 if (itype == NULL_TREE
3168 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3169 return NULL;
3171 vecitype = get_vectype_for_scalar_type (itype);
3172 if (vecitype == NULL_TREE)
3173 return NULL;
3175 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3176 return NULL;
3178 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3180 if ((TREE_CODE (then_clause) == INTEGER_CST
3181 && !int_fits_type_p (then_clause, itype))
3182 || (TREE_CODE (else_clause) == INTEGER_CST
3183 && !int_fits_type_p (else_clause, itype)))
3184 return NULL;
3187 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3188 COND_EXPR, unshare_expr (cond_expr),
3189 fold_convert (itype, then_clause),
3190 fold_convert (itype, else_clause));
3191 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3192 NOP_EXPR, gimple_assign_lhs (def_stmt));
3194 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3195 *type_out = vectype;
3197 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3199 return pattern_stmt;
3203 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3204 true if bool VAR can and should be optimized that way. Assume it shouldn't
3205 in case it's a result of a comparison which can be directly vectorized into
3206 a vector comparison. Fills in STMTS with all stmts visited during the
3207 walk. */
3209 static bool
3210 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3212 tree rhs1;
3213 enum tree_code rhs_code;
3215 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3216 if (!def_stmt_info)
3217 return false;
3219 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3220 if (!def_stmt)
3221 return false;
3223 if (stmts.contains (def_stmt))
3224 return true;
3226 rhs1 = gimple_assign_rhs1 (def_stmt);
3227 rhs_code = gimple_assign_rhs_code (def_stmt);
3228 switch (rhs_code)
3230 case SSA_NAME:
3231 if (! check_bool_pattern (rhs1, vinfo, stmts))
3232 return false;
3233 break;
3235 CASE_CONVERT:
3236 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3237 return false;
3238 if (! check_bool_pattern (rhs1, vinfo, stmts))
3239 return false;
3240 break;
3242 case BIT_NOT_EXPR:
3243 if (! check_bool_pattern (rhs1, vinfo, stmts))
3244 return false;
3245 break;
3247 case BIT_AND_EXPR:
3248 case BIT_IOR_EXPR:
3249 case BIT_XOR_EXPR:
3250 if (! check_bool_pattern (rhs1, vinfo, stmts)
3251 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3252 return false;
3253 break;
3255 default:
3256 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3258 tree vecitype, comp_vectype;
3260 /* If the comparison can throw, then is_gimple_condexpr will be
3261 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3262 if (stmt_could_throw_p (def_stmt))
3263 return false;
3265 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3266 if (comp_vectype == NULL_TREE)
3267 return false;
3269 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3270 if (mask_type
3271 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3272 return false;
3274 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3276 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3277 tree itype
3278 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3279 vecitype = get_vectype_for_scalar_type (itype);
3280 if (vecitype == NULL_TREE)
3281 return false;
3283 else
3284 vecitype = comp_vectype;
3285 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3286 return false;
3288 else
3289 return false;
3290 break;
3293 bool res = stmts.add (def_stmt);
3294 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3295 gcc_assert (!res);
3297 return true;
3301 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3302 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3303 pattern sequence. */
3305 static tree
3306 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3308 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3309 NOP_EXPR, var);
3310 append_pattern_def_seq (stmt_info, cast_stmt,
3311 get_vectype_for_scalar_type (type));
3312 return gimple_assign_lhs (cast_stmt);
3315 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3316 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3317 type, OUT_TYPE is the desired final integer type of the whole pattern.
3318 STMT_INFO is the info of the pattern root and is where pattern stmts should
3319 be associated with. DEFS is a map of pattern defs. */
3321 static void
3322 adjust_bool_pattern (tree var, tree out_type,
3323 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3325 gimple *stmt = SSA_NAME_DEF_STMT (var);
3326 enum tree_code rhs_code, def_rhs_code;
3327 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3328 location_t loc;
3329 gimple *pattern_stmt, *def_stmt;
3330 tree trueval = NULL_TREE;
3332 rhs1 = gimple_assign_rhs1 (stmt);
3333 rhs2 = gimple_assign_rhs2 (stmt);
3334 rhs_code = gimple_assign_rhs_code (stmt);
3335 loc = gimple_location (stmt);
3336 switch (rhs_code)
3338 case SSA_NAME:
3339 CASE_CONVERT:
3340 irhs1 = *defs.get (rhs1);
3341 itype = TREE_TYPE (irhs1);
3342 pattern_stmt
3343 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3344 SSA_NAME, irhs1);
3345 break;
3347 case BIT_NOT_EXPR:
3348 irhs1 = *defs.get (rhs1);
3349 itype = TREE_TYPE (irhs1);
3350 pattern_stmt
3351 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3352 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3353 break;
3355 case BIT_AND_EXPR:
3356 /* Try to optimize x = y & (a < b ? 1 : 0); into
3357 x = (a < b ? y : 0);
3359 E.g. for:
3360 bool a_b, b_b, c_b;
3361 TYPE d_T;
3363 S1 a_b = x1 CMP1 y1;
3364 S2 b_b = x2 CMP2 y2;
3365 S3 c_b = a_b & b_b;
3366 S4 d_T = (TYPE) c_b;
3368 we would normally emit:
3370 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3371 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3372 S3' c_T = a_T & b_T;
3373 S4' d_T = c_T;
3375 but we can save one stmt by using the
3376 result of one of the COND_EXPRs in the other COND_EXPR and leave
3377 BIT_AND_EXPR stmt out:
3379 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3380 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3381 S4' f_T = c_T;
3383 At least when VEC_COND_EXPR is implemented using masks
3384 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3385 computes the comparison masks and ands it, in one case with
3386 all ones vector, in the other case with a vector register.
3387 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3388 often more expensive. */
3389 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3390 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3391 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3393 irhs1 = *defs.get (rhs1);
3394 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3395 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3396 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3398 rhs_code = def_rhs_code;
3399 rhs1 = def_rhs1;
3400 rhs2 = gimple_assign_rhs2 (def_stmt);
3401 trueval = irhs1;
3402 goto do_compare;
3404 else
3405 irhs2 = *defs.get (rhs2);
3406 goto and_ior_xor;
3408 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3409 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3410 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3412 irhs2 = *defs.get (rhs2);
3413 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3414 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3415 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3417 rhs_code = def_rhs_code;
3418 rhs1 = def_rhs1;
3419 rhs2 = gimple_assign_rhs2 (def_stmt);
3420 trueval = irhs2;
3421 goto do_compare;
3423 else
3424 irhs1 = *defs.get (rhs1);
3425 goto and_ior_xor;
3427 /* FALLTHRU */
3428 case BIT_IOR_EXPR:
3429 case BIT_XOR_EXPR:
3430 irhs1 = *defs.get (rhs1);
3431 irhs2 = *defs.get (rhs2);
3432 and_ior_xor:
3433 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3434 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3436 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3437 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3438 int out_prec = TYPE_PRECISION (out_type);
3439 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3440 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3441 stmt_info);
3442 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3443 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3444 stmt_info);
3445 else
3447 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3448 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3451 itype = TREE_TYPE (irhs1);
3452 pattern_stmt
3453 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3454 rhs_code, irhs1, irhs2);
3455 break;
3457 default:
3458 do_compare:
3459 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3460 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3461 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3462 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3463 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3465 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3466 itype
3467 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3469 else
3470 itype = TREE_TYPE (rhs1);
3471 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3472 if (trueval == NULL_TREE)
3473 trueval = build_int_cst (itype, 1);
3474 else
3475 gcc_checking_assert (useless_type_conversion_p (itype,
3476 TREE_TYPE (trueval)));
3477 pattern_stmt
3478 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3479 COND_EXPR, cond_expr, trueval,
3480 build_int_cst (itype, 0));
3481 break;
3484 gimple_set_location (pattern_stmt, loc);
3485 append_pattern_def_seq (stmt_info, pattern_stmt,
3486 get_vectype_for_scalar_type (itype));
3487 defs.put (var, gimple_assign_lhs (pattern_stmt));
3490 /* Comparison function to qsort a vector of gimple stmts after UID. */
3492 static int
3493 sort_after_uid (const void *p1, const void *p2)
3495 const gimple *stmt1 = *(const gimple * const *)p1;
3496 const gimple *stmt2 = *(const gimple * const *)p2;
3497 return gimple_uid (stmt1) - gimple_uid (stmt2);
3500 /* Create pattern stmts for all stmts participating in the bool pattern
3501 specified by BOOL_STMT_SET and its root STMT with the desired type
3502 OUT_TYPE. Return the def of the pattern root. */
3504 static tree
3505 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3506 tree out_type, gimple *stmt)
3508 /* Gather original stmts in the bool pattern in their order of appearance
3509 in the IL. */
3510 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3511 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3512 i != bool_stmt_set.end (); ++i)
3513 bool_stmts.quick_push (*i);
3514 bool_stmts.qsort (sort_after_uid);
3516 /* Now process them in that order, producing pattern stmts. */
3517 hash_map <tree, tree> defs;
3518 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3519 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3520 out_type, vinfo_for_stmt (stmt), defs);
3522 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3523 gimple *pattern_stmt
3524 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3525 return gimple_assign_lhs (pattern_stmt);
3528 /* Helper for search_type_for_mask. */
3530 static tree
3531 search_type_for_mask_1 (tree var, vec_info *vinfo,
3532 hash_map<gimple *, tree> &cache)
3534 tree rhs1;
3535 enum tree_code rhs_code;
3536 tree res = NULL_TREE, res2;
3538 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3539 return NULL_TREE;
3541 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3542 if (!def_stmt_info)
3543 return NULL_TREE;
3545 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3546 if (!def_stmt)
3547 return NULL_TREE;
3549 tree *c = cache.get (def_stmt);
3550 if (c)
3551 return *c;
3553 rhs_code = gimple_assign_rhs_code (def_stmt);
3554 rhs1 = gimple_assign_rhs1 (def_stmt);
3556 switch (rhs_code)
3558 case SSA_NAME:
3559 case BIT_NOT_EXPR:
3560 CASE_CONVERT:
3561 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3562 break;
3564 case BIT_AND_EXPR:
3565 case BIT_IOR_EXPR:
3566 case BIT_XOR_EXPR:
3567 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3568 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3569 cache);
3570 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3571 res = res2;
3572 break;
3574 default:
3575 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3577 tree comp_vectype, mask_type;
3579 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3581 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3582 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3583 vinfo, cache);
3584 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3585 res = res2;
3586 break;
3589 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3590 if (comp_vectype == NULL_TREE)
3592 res = NULL_TREE;
3593 break;
3596 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3597 if (!mask_type
3598 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3600 res = NULL_TREE;
3601 break;
3604 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3605 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3607 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3608 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3610 else
3611 res = TREE_TYPE (rhs1);
3615 cache.put (def_stmt, res);
3616 return res;
3619 /* Return the proper type for converting bool VAR into
3620 an integer value or NULL_TREE if no such type exists.
3621 The type is chosen so that converted value has the
3622 same number of elements as VAR's vector type. */
3624 static tree
3625 search_type_for_mask (tree var, vec_info *vinfo)
3627 hash_map<gimple *, tree> cache;
3628 return search_type_for_mask_1 (var, vinfo, cache);
3631 /* Function vect_recog_bool_pattern
3633 Try to find pattern like following:
3635 bool a_b, b_b, c_b, d_b, e_b;
3636 TYPE f_T;
3637 loop:
3638 S1 a_b = x1 CMP1 y1;
3639 S2 b_b = x2 CMP2 y2;
3640 S3 c_b = a_b & b_b;
3641 S4 d_b = x3 CMP3 y3;
3642 S5 e_b = c_b | d_b;
3643 S6 f_T = (TYPE) e_b;
3645 where type 'TYPE' is an integral type. Or a similar pattern
3646 ending in
3648 S6 f_Y = e_b ? r_Y : s_Y;
3650 as results from if-conversion of a complex condition.
3652 Input:
3654 * STMT_VINFO: The stmt at the end from which the pattern
3655 search begins, i.e. cast of a bool to
3656 an integer type.
3658 Output:
3660 * TYPE_OUT: The type of the output of this pattern.
3662 * Return value: A new stmt that will be used to replace the pattern.
3664 Assuming size of TYPE is the same as size of all comparisons
3665 (otherwise some casts would be added where needed), the above
3666 sequence we create related pattern stmts:
3667 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3668 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3669 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3670 S5' e_T = c_T | d_T;
3671 S6' f_T = e_T;
3673 Instead of the above S3' we could emit:
3674 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3675 S3' c_T = a_T | b_T;
3676 but the above is more efficient. */
3678 static gimple *
3679 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3681 gimple *last_stmt = stmt_vinfo->stmt;
3682 enum tree_code rhs_code;
3683 tree var, lhs, rhs, vectype;
3684 vec_info *vinfo = stmt_vinfo->vinfo;
3685 gimple *pattern_stmt;
3687 if (!is_gimple_assign (last_stmt))
3688 return NULL;
3690 var = gimple_assign_rhs1 (last_stmt);
3691 lhs = gimple_assign_lhs (last_stmt);
3693 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3694 return NULL;
3696 hash_set<gimple *> bool_stmts;
3698 rhs_code = gimple_assign_rhs_code (last_stmt);
3699 if (CONVERT_EXPR_CODE_P (rhs_code))
3701 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3702 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3703 return NULL;
3704 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3705 if (vectype == NULL_TREE)
3706 return NULL;
3708 if (check_bool_pattern (var, vinfo, bool_stmts))
3710 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3711 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3712 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3713 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3714 else
3715 pattern_stmt
3716 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3718 else
3720 tree type = search_type_for_mask (var, vinfo);
3721 tree cst0, cst1, tmp;
3723 if (!type)
3724 return NULL;
3726 /* We may directly use cond with narrowed type to avoid
3727 multiple cond exprs with following result packing and
3728 perform single cond with packed mask instead. In case
3729 of widening we better make cond first and then extract
3730 results. */
3731 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3732 type = TREE_TYPE (lhs);
3734 cst0 = build_int_cst (type, 0);
3735 cst1 = build_int_cst (type, 1);
3736 tmp = vect_recog_temp_ssa_var (type, NULL);
3737 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3739 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3741 tree new_vectype = get_vectype_for_scalar_type (type);
3742 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3744 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3745 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3749 *type_out = vectype;
3750 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3752 return pattern_stmt;
3754 else if (rhs_code == COND_EXPR
3755 && TREE_CODE (var) == SSA_NAME)
3757 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3758 if (vectype == NULL_TREE)
3759 return NULL;
3761 /* Build a scalar type for the boolean result that when
3762 vectorized matches the vector type of the result in
3763 size and number of elements. */
3764 unsigned prec
3765 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3766 TYPE_VECTOR_SUBPARTS (vectype));
3768 tree type
3769 = build_nonstandard_integer_type (prec,
3770 TYPE_UNSIGNED (TREE_TYPE (var)));
3771 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3772 return NULL;
3774 if (!check_bool_pattern (var, vinfo, bool_stmts))
3775 return NULL;
3777 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3779 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3780 pattern_stmt
3781 = gimple_build_assign (lhs, COND_EXPR,
3782 build2 (NE_EXPR, boolean_type_node,
3783 rhs, build_int_cst (type, 0)),
3784 gimple_assign_rhs2 (last_stmt),
3785 gimple_assign_rhs3 (last_stmt));
3786 *type_out = vectype;
3787 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3789 return pattern_stmt;
3791 else if (rhs_code == SSA_NAME
3792 && STMT_VINFO_DATA_REF (stmt_vinfo))
3794 stmt_vec_info pattern_stmt_info;
3795 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3796 gcc_assert (vectype != NULL_TREE);
3797 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3798 return NULL;
3800 if (check_bool_pattern (var, vinfo, bool_stmts))
3801 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3802 else
3804 tree type = search_type_for_mask (var, vinfo);
3805 tree cst0, cst1, new_vectype;
3807 if (!type)
3808 return NULL;
3810 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3811 type = TREE_TYPE (vectype);
3813 cst0 = build_int_cst (type, 0);
3814 cst1 = build_int_cst (type, 1);
3815 new_vectype = get_vectype_for_scalar_type (type);
3817 rhs = vect_recog_temp_ssa_var (type, NULL);
3818 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3819 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3822 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3823 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3825 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3826 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3827 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3828 rhs = rhs2;
3830 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3831 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3832 STMT_VINFO_DATA_REF (pattern_stmt_info)
3833 = STMT_VINFO_DATA_REF (stmt_vinfo);
3834 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3835 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3836 *type_out = vectype;
3837 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3839 return pattern_stmt;
3841 else
3842 return NULL;
3846 /* A helper for vect_recog_mask_conversion_pattern. Build
3847 conversion of MASK to a type suitable for masking VECTYPE.
3848 Built statement gets required vectype and is appended to
3849 a pattern sequence of STMT_VINFO.
3851 Return converted mask. */
3853 static tree
3854 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
3856 gimple *stmt;
3857 tree masktype, tmp;
3859 masktype = build_same_sized_truth_vector_type (vectype);
3860 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3861 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3862 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
3864 return tmp;
3868 /* Function vect_recog_mask_conversion_pattern
3870 Try to find statements which require boolean type
3871 converison. Additional conversion statements are
3872 added to handle such cases. For example:
3874 bool m_1, m_2, m_3;
3875 int i_4, i_5;
3876 double d_6, d_7;
3877 char c_1, c_2, c_3;
3879 S1 m_1 = i_4 > i_5;
3880 S2 m_2 = d_6 < d_7;
3881 S3 m_3 = m_1 & m_2;
3882 S4 c_1 = m_3 ? c_2 : c_3;
3884 Will be transformed into:
3886 S1 m_1 = i_4 > i_5;
3887 S2 m_2 = d_6 < d_7;
3888 S3'' m_2' = (_Bool[bitsize=32])m_2
3889 S3' m_3' = m_1 & m_2';
3890 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3891 S4' c_1' = m_3'' ? c_2 : c_3; */
3893 static gimple *
3894 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3896 gimple *last_stmt = stmt_vinfo->stmt;
3897 enum tree_code rhs_code;
3898 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3899 tree vectype1, vectype2;
3900 stmt_vec_info pattern_stmt_info;
3901 vec_info *vinfo = stmt_vinfo->vinfo;
3903 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3904 if (is_gimple_call (last_stmt)
3905 && gimple_call_internal_p (last_stmt))
3907 gcall *pattern_stmt;
3909 internal_fn ifn = gimple_call_internal_fn (last_stmt);
3910 int mask_argno = internal_fn_mask_index (ifn);
3911 if (mask_argno < 0)
3912 return NULL;
3914 bool store_p = internal_store_fn_p (ifn);
3915 if (store_p)
3917 int rhs_index = internal_fn_stored_value_index (ifn);
3918 tree rhs = gimple_call_arg (last_stmt, rhs_index);
3919 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
3921 else
3923 lhs = gimple_call_lhs (last_stmt);
3924 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3927 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3928 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3929 if (!mask_arg_type)
3930 return NULL;
3931 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
3933 if (!vectype1 || !vectype2
3934 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3935 TYPE_VECTOR_SUBPARTS (vectype2)))
3936 return NULL;
3938 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3940 auto_vec<tree, 8> args;
3941 unsigned int nargs = gimple_call_num_args (last_stmt);
3942 args.safe_grow (nargs);
3943 for (unsigned int i = 0; i < nargs; ++i)
3944 args[i] = ((int) i == mask_argno
3945 ? tmp
3946 : gimple_call_arg (last_stmt, i));
3947 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
3949 if (!store_p)
3951 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3952 gimple_call_set_lhs (pattern_stmt, lhs);
3954 gimple_call_set_nothrow (pattern_stmt, true);
3956 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3957 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3959 STMT_VINFO_DATA_REF (pattern_stmt_info)
3960 = STMT_VINFO_DATA_REF (stmt_vinfo);
3961 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3962 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3963 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
3964 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo);
3967 *type_out = vectype1;
3968 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3970 return pattern_stmt;
3973 if (!is_gimple_assign (last_stmt))
3974 return NULL;
3976 gimple *pattern_stmt;
3977 lhs = gimple_assign_lhs (last_stmt);
3978 rhs1 = gimple_assign_rhs1 (last_stmt);
3979 rhs_code = gimple_assign_rhs_code (last_stmt);
3981 /* Check for cond expression requiring mask conversion. */
3982 if (rhs_code == COND_EXPR)
3984 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3986 if (TREE_CODE (rhs1) == SSA_NAME)
3988 rhs1_type = search_type_for_mask (rhs1, vinfo);
3989 if (!rhs1_type)
3990 return NULL;
3992 else if (COMPARISON_CLASS_P (rhs1))
3994 /* Check whether we're comparing scalar booleans and (if so)
3995 whether a better mask type exists than the mask associated
3996 with boolean-sized elements. This avoids unnecessary packs
3997 and unpacks if the booleans are set from comparisons of
3998 wider types. E.g. in:
4000 int x1, x2, x3, x4, y1, y1;
4002 bool b1 = (x1 == x2);
4003 bool b2 = (x3 == x4);
4004 ... = b1 == b2 ? y1 : y2;
4006 it is better for b1 and b2 to use the mask type associated
4007 with int elements rather bool (byte) elements. */
4008 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4009 if (!rhs1_type)
4010 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4012 else
4013 return NULL;
4015 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4017 if (!vectype1 || !vectype2)
4018 return NULL;
4020 /* Continue if a conversion is needed. Also continue if we have
4021 a comparison whose vector type would normally be different from
4022 VECTYPE2 when considered in isolation. In that case we'll
4023 replace the comparison with an SSA name (so that we can record
4024 its vector type) and behave as though the comparison was an SSA
4025 name from the outset. */
4026 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4027 TYPE_VECTOR_SUBPARTS (vectype2))
4028 && (TREE_CODE (rhs1) == SSA_NAME
4029 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4030 return NULL;
4032 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4033 in place, we can handle it in vectorizable_condition. This avoids
4034 unnecessary promotion stmts and increased vectorization factor. */
4035 if (COMPARISON_CLASS_P (rhs1)
4036 && INTEGRAL_TYPE_P (rhs1_type)
4037 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4038 TYPE_VECTOR_SUBPARTS (vectype2)))
4040 enum vect_def_type dt;
4041 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4042 && dt == vect_external_def
4043 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4044 && (dt == vect_external_def
4045 || dt == vect_constant_def))
4047 tree wide_scalar_type = build_nonstandard_integer_type
4048 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4049 TYPE_UNSIGNED (rhs1_type));
4050 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4051 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4052 return NULL;
4056 /* If rhs1 is a comparison we need to move it into a
4057 separate statement. */
4058 if (TREE_CODE (rhs1) != SSA_NAME)
4060 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4061 pattern_stmt = gimple_build_assign (tmp, rhs1);
4062 rhs1 = tmp;
4063 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
4066 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4067 TYPE_VECTOR_SUBPARTS (vectype2)))
4068 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4069 else
4070 tmp = rhs1;
4072 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4073 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4074 gimple_assign_rhs2 (last_stmt),
4075 gimple_assign_rhs3 (last_stmt));
4077 *type_out = vectype1;
4078 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4080 return pattern_stmt;
4083 /* Now check for binary boolean operations requiring conversion for
4084 one of operands. */
4085 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4086 return NULL;
4088 if (rhs_code != BIT_IOR_EXPR
4089 && rhs_code != BIT_XOR_EXPR
4090 && rhs_code != BIT_AND_EXPR
4091 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4092 return NULL;
4094 rhs2 = gimple_assign_rhs2 (last_stmt);
4096 rhs1_type = search_type_for_mask (rhs1, vinfo);
4097 rhs2_type = search_type_for_mask (rhs2, vinfo);
4099 if (!rhs1_type || !rhs2_type
4100 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4101 return NULL;
4103 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4105 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4106 if (!vectype1)
4107 return NULL;
4108 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4110 else
4112 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4113 if (!vectype1)
4114 return NULL;
4115 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4118 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4119 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4121 *type_out = vectype1;
4122 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4124 return pattern_stmt;
4127 /* STMT is a load or store. If the load or store is conditional, return
4128 the boolean condition under which it occurs, otherwise return null. */
4130 static tree
4131 vect_get_load_store_mask (gimple *stmt)
4133 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4135 gcc_assert (gimple_assign_single_p (def_assign));
4136 return NULL_TREE;
4139 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4141 internal_fn ifn = gimple_call_internal_fn (def_call);
4142 int mask_index = internal_fn_mask_index (ifn);
4143 return gimple_call_arg (def_call, mask_index);
4146 gcc_unreachable ();
4149 /* Return the scalar offset type that an internal gather/scatter function
4150 should use. GS_INFO describes the gather/scatter operation. */
4152 static tree
4153 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4155 tree offset_type = TREE_TYPE (gs_info->offset);
4156 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4158 /* Enforced by vect_check_gather_scatter. */
4159 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4160 gcc_assert (element_bits >= offset_bits);
4162 /* If the offset is narrower than the elements, extend it according
4163 to its sign. */
4164 if (element_bits > offset_bits)
4165 return build_nonstandard_integer_type (element_bits,
4166 TYPE_UNSIGNED (offset_type));
4168 return offset_type;
4171 /* Return MASK if MASK is suitable for masking an operation on vectors
4172 of type VECTYPE, otherwise convert it into such a form and return
4173 the result. Associate any conversion statements with STMT_INFO's
4174 pattern. */
4176 static tree
4177 vect_convert_mask_for_vectype (tree mask, tree vectype,
4178 stmt_vec_info stmt_info, vec_info *vinfo)
4180 tree mask_type = search_type_for_mask (mask, vinfo);
4181 if (mask_type)
4183 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4184 if (mask_vectype
4185 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4186 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4187 mask = build_mask_conversion (mask, vectype, stmt_info);
4189 return mask;
4192 /* Return the equivalent of:
4194 fold_convert (TYPE, VALUE)
4196 with the expectation that the operation will be vectorized.
4197 If new statements are needed, add them as pattern statements
4198 to STMT_INFO. */
4200 static tree
4201 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4203 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4204 return value;
4206 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4207 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4208 append_pattern_def_seq (stmt_info, conversion,
4209 get_vectype_for_scalar_type (type));
4210 return new_value;
4213 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4214 internal function. Return the final statement on success and set
4215 *TYPE_OUT to the vector type being loaded or stored.
4217 This function only handles gathers and scatters that were recognized
4218 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4220 static gimple *
4221 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4223 /* Currently we only support this for loop vectorization. */
4224 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4225 if (!loop_vinfo)
4226 return NULL;
4228 /* Make sure that we're looking at a gather load or scatter store. */
4229 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4230 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4231 return NULL;
4233 /* Get the boolean that controls whether the load or store happens.
4234 This is null if the operation is unconditional. */
4235 gimple *stmt = stmt_info->stmt;
4236 tree mask = vect_get_load_store_mask (stmt);
4238 /* Make sure that the target supports an appropriate internal
4239 function for the gather/scatter operation. */
4240 gather_scatter_info gs_info;
4241 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4242 || gs_info.decl)
4243 return NULL;
4245 /* Convert the mask to the right form. */
4246 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4247 if (mask)
4248 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4249 loop_vinfo);
4251 /* Get the invariant base and non-invariant offset, converting the
4252 latter to the same width as the vector elements. */
4253 tree base = gs_info.base;
4254 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4255 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4256 stmt_info);
4258 /* Build the new pattern statement. */
4259 tree scale = size_int (gs_info.scale);
4260 gcall *pattern_stmt;
4261 if (DR_IS_READ (dr))
4263 if (mask != NULL)
4264 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4265 offset, scale, mask);
4266 else
4267 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4268 offset, scale);
4269 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4270 gimple_call_set_lhs (pattern_stmt, load_lhs);
4272 else
4274 tree rhs = vect_get_store_rhs (stmt);
4275 if (mask != NULL)
4276 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4277 base, offset, scale, rhs,
4278 mask);
4279 else
4280 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4281 base, offset, scale, rhs);
4283 gimple_call_set_nothrow (pattern_stmt, true);
4285 /* Copy across relevant vectorization info and associate DR with the
4286 new pattern statement instead of the original statement. */
4287 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4288 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4289 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4290 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4291 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4292 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4294 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4295 *type_out = vectype;
4296 vect_pattern_detected ("gather/scatter pattern", stmt);
4298 return pattern_stmt;
4301 /* Return true if TYPE is a non-boolean integer type. These are the types
4302 that we want to consider for narrowing. */
4304 static bool
4305 vect_narrowable_type_p (tree type)
4307 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4310 /* Return true if the operation given by CODE can be truncated to N bits
4311 when only N bits of the output are needed. This is only true if bit N+1
4312 of the inputs has no effect on the low N bits of the result. */
4314 static bool
4315 vect_truncatable_operation_p (tree_code code)
4317 switch (code)
4319 case PLUS_EXPR:
4320 case MINUS_EXPR:
4321 case MULT_EXPR:
4322 case BIT_AND_EXPR:
4323 case BIT_IOR_EXPR:
4324 case BIT_XOR_EXPR:
4325 case COND_EXPR:
4326 return true;
4328 default:
4329 return false;
4333 /* Record that STMT_INFO could be changed from operating on TYPE to
4334 operating on a type with the precision and sign given by PRECISION
4335 and SIGN respectively. PRECISION is an arbitrary bit precision;
4336 it might not be a whole number of bytes. */
4338 static void
4339 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4340 unsigned int precision, signop sign)
4342 /* Round the precision up to a whole number of bytes. */
4343 precision = vect_element_precision (precision);
4344 if (precision < TYPE_PRECISION (type)
4345 && (!stmt_info->operation_precision
4346 || stmt_info->operation_precision > precision))
4348 stmt_info->operation_precision = precision;
4349 stmt_info->operation_sign = sign;
4353 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4354 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4355 is an arbitrary bit precision; it might not be a whole number of bytes. */
4357 static void
4358 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4359 unsigned int min_input_precision)
4361 /* This operation in isolation only requires the inputs to have
4362 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4363 that MIN_INPUT_PRECISION is a natural precision for the chain
4364 as a whole. E.g. consider something like:
4366 unsigned short *x, *y;
4367 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4369 The right shift can be done on unsigned chars, and only requires the
4370 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4371 approach would mean turning a natural chain of single-vector unsigned
4372 short operations into one that truncates "*x" and then extends
4373 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4374 operation and one vector for each unsigned char operation.
4375 This would be a significant pessimization.
4377 Instead only propagate the maximum of this precision and the precision
4378 required by the users of the result. This means that we don't pessimize
4379 the case above but continue to optimize things like:
4381 unsigned char *y;
4382 unsigned short *x;
4383 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4385 Here we would truncate two vectors of *x to a single vector of
4386 unsigned chars and use single-vector unsigned char operations for
4387 everything else, rather than doing two unsigned short copies of
4388 "(*x & 0xf0) >> 4" and then truncating the result. */
4389 min_input_precision = MAX (min_input_precision,
4390 stmt_info->min_output_precision);
4392 if (min_input_precision < TYPE_PRECISION (type)
4393 && (!stmt_info->min_input_precision
4394 || stmt_info->min_input_precision > min_input_precision))
4395 stmt_info->min_input_precision = min_input_precision;
4398 /* Subroutine of vect_determine_min_output_precision. Return true if
4399 we can calculate a reduced number of output bits for STMT_INFO,
4400 whose result is LHS. */
4402 static bool
4403 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4405 /* Take the maximum precision required by users of the result. */
4406 vec_info *vinfo = stmt_info->vinfo;
4407 unsigned int precision = 0;
4408 imm_use_iterator iter;
4409 use_operand_p use;
4410 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4412 gimple *use_stmt = USE_STMT (use);
4413 if (is_gimple_debug (use_stmt))
4414 continue;
4415 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4416 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4417 return false;
4418 precision = MAX (precision, use_stmt_info->min_input_precision);
4421 if (dump_enabled_p ())
4423 dump_printf_loc (MSG_NOTE, vect_location, "only the low %d bits of ",
4424 precision);
4425 dump_generic_expr (MSG_NOTE, TDF_SLIM, lhs);
4426 dump_printf (MSG_NOTE, " are significant\n");
4428 stmt_info->min_output_precision = precision;
4429 return true;
4432 /* Calculate min_output_precision for STMT_INFO. */
4434 static void
4435 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4437 /* We're only interested in statements with a narrowable result. */
4438 tree lhs = gimple_get_lhs (stmt_info->stmt);
4439 if (!lhs
4440 || TREE_CODE (lhs) != SSA_NAME
4441 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4442 return;
4444 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4445 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4448 /* Use range information to decide whether STMT (described by STMT_INFO)
4449 could be done in a narrower type. This is effectively a forward
4450 propagation, since it uses context-independent information that applies
4451 to all users of an SSA name. */
4453 static void
4454 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4456 tree lhs = gimple_assign_lhs (stmt);
4457 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4458 return;
4460 tree type = TREE_TYPE (lhs);
4461 if (!vect_narrowable_type_p (type))
4462 return;
4464 /* First see whether we have any useful range information for the result. */
4465 unsigned int precision = TYPE_PRECISION (type);
4466 signop sign = TYPE_SIGN (type);
4467 wide_int min_value, max_value;
4468 if (!vect_get_range_info (lhs, &min_value, &max_value))
4469 return;
4471 tree_code code = gimple_assign_rhs_code (stmt);
4472 unsigned int nops = gimple_num_ops (stmt);
4474 if (!vect_truncatable_operation_p (code))
4475 /* Check that all relevant input operands are compatible, and update
4476 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4477 for (unsigned int i = 1; i < nops; ++i)
4479 tree op = gimple_op (stmt, i);
4480 if (TREE_CODE (op) == INTEGER_CST)
4482 /* Don't require the integer to have RHS_TYPE (which it might
4483 not for things like shift amounts, etc.), but do require it
4484 to fit the type. */
4485 if (!int_fits_type_p (op, type))
4486 return;
4488 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4489 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4491 else if (TREE_CODE (op) == SSA_NAME)
4493 /* Ignore codes that don't take uniform arguments. */
4494 if (!types_compatible_p (TREE_TYPE (op), type))
4495 return;
4497 wide_int op_min_value, op_max_value;
4498 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4499 return;
4501 min_value = wi::min (min_value, op_min_value, sign);
4502 max_value = wi::max (max_value, op_max_value, sign);
4504 else
4505 return;
4508 /* Try to switch signed types for unsigned types if we can.
4509 This is better for two reasons. First, unsigned ops tend
4510 to be cheaper than signed ops. Second, it means that we can
4511 handle things like:
4513 signed char c;
4514 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4518 signed char c;
4519 unsigned short res_1 = (unsigned short) c & 0xff00;
4520 int res = (int) res_1;
4522 where the intermediate result res_1 has unsigned rather than
4523 signed type. */
4524 if (sign == SIGNED && !wi::neg_p (min_value))
4525 sign = UNSIGNED;
4527 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4528 unsigned int precision1 = wi::min_precision (min_value, sign);
4529 unsigned int precision2 = wi::min_precision (max_value, sign);
4530 unsigned int value_precision = MAX (precision1, precision2);
4531 if (value_precision >= precision)
4532 return;
4534 if (dump_enabled_p ())
4536 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4537 " without loss of precision: ",
4538 sign == SIGNED ? "signed" : "unsigned",
4539 value_precision);
4540 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4543 vect_set_operation_type (stmt_info, type, value_precision, sign);
4544 vect_set_min_input_precision (stmt_info, type, value_precision);
4547 /* Use information about the users of STMT's result to decide whether
4548 STMT (described by STMT_INFO) could be done in a narrower type.
4549 This is effectively a backward propagation. */
4551 static void
4552 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4554 tree_code code = gimple_assign_rhs_code (stmt);
4555 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4556 tree type = TREE_TYPE (gimple_op (stmt, opno));
4557 if (!vect_narrowable_type_p (type))
4558 return;
4560 unsigned int precision = TYPE_PRECISION (type);
4561 unsigned int operation_precision, min_input_precision;
4562 switch (code)
4564 CASE_CONVERT:
4565 /* Only the bits that contribute to the output matter. Don't change
4566 the precision of the operation itself. */
4567 operation_precision = precision;
4568 min_input_precision = stmt_info->min_output_precision;
4569 break;
4571 case LSHIFT_EXPR:
4572 case RSHIFT_EXPR:
4574 tree shift = gimple_assign_rhs2 (stmt);
4575 if (TREE_CODE (shift) != INTEGER_CST
4576 || !wi::ltu_p (wi::to_widest (shift), precision))
4577 return;
4578 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4579 if (code == LSHIFT_EXPR)
4581 /* We need CONST_SHIFT fewer bits of the input. */
4582 operation_precision = stmt_info->min_output_precision;
4583 min_input_precision = (MAX (operation_precision, const_shift)
4584 - const_shift);
4586 else
4588 /* We need CONST_SHIFT extra bits to do the operation. */
4589 operation_precision = (stmt_info->min_output_precision
4590 + const_shift);
4591 min_input_precision = operation_precision;
4593 break;
4596 default:
4597 if (vect_truncatable_operation_p (code))
4599 /* Input bit N has no effect on output bits N-1 and lower. */
4600 operation_precision = stmt_info->min_output_precision;
4601 min_input_precision = operation_precision;
4602 break;
4604 return;
4607 if (operation_precision < precision)
4609 if (dump_enabled_p ())
4611 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4612 " without affecting users: ",
4613 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4614 operation_precision);
4615 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4617 vect_set_operation_type (stmt_info, type, operation_precision,
4618 TYPE_SIGN (type));
4620 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4623 /* Handle vect_determine_precisions for STMT_INFO, given that we
4624 have already done so for the users of its result. */
4626 void
4627 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4629 vect_determine_min_output_precision (stmt_info);
4630 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4632 vect_determine_precisions_from_range (stmt_info, stmt);
4633 vect_determine_precisions_from_users (stmt_info, stmt);
4637 /* Walk backwards through the vectorizable region to determine the
4638 values of these fields:
4640 - min_output_precision
4641 - min_input_precision
4642 - operation_precision
4643 - operation_sign. */
4645 void
4646 vect_determine_precisions (vec_info *vinfo)
4648 DUMP_VECT_SCOPE ("vect_determine_precisions");
4650 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4652 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4653 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4654 unsigned int nbbs = loop->num_nodes;
4656 for (unsigned int i = 0; i < nbbs; i++)
4658 basic_block bb = bbs[nbbs - i - 1];
4659 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4660 !gsi_end_p (si); gsi_prev (&si))
4661 vect_determine_stmt_precisions
4662 (vinfo->lookup_stmt (gsi_stmt (si)));
4665 else
4667 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4668 gimple_stmt_iterator si = bb_vinfo->region_end;
4669 gimple *stmt;
4672 if (!gsi_stmt (si))
4673 si = gsi_last_bb (bb_vinfo->bb);
4674 else
4675 gsi_prev (&si);
4676 stmt = gsi_stmt (si);
4677 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
4678 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4679 vect_determine_stmt_precisions (stmt_info);
4681 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4685 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
4687 struct vect_recog_func
4689 vect_recog_func_ptr fn;
4690 const char *name;
4693 /* Note that ordering matters - the first pattern matching on a stmt is
4694 taken which means usually the more complex one needs to preceed the
4695 less comples onex (widen_sum only after dot_prod or sad for example). */
4696 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4697 { vect_recog_over_widening_pattern, "over_widening" },
4698 /* Must come after over_widening, which narrows the shift as much as
4699 possible beforehand. */
4700 { vect_recog_average_pattern, "average" },
4701 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4702 { vect_recog_widen_mult_pattern, "widen_mult" },
4703 { vect_recog_dot_prod_pattern, "dot_prod" },
4704 { vect_recog_sad_pattern, "sad" },
4705 { vect_recog_widen_sum_pattern, "widen_sum" },
4706 { vect_recog_pow_pattern, "pow" },
4707 { vect_recog_widen_shift_pattern, "widen_shift" },
4708 { vect_recog_rotate_pattern, "rotate" },
4709 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4710 { vect_recog_divmod_pattern, "divmod" },
4711 { vect_recog_mult_pattern, "mult" },
4712 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4713 { vect_recog_bool_pattern, "bool" },
4714 /* This must come before mask conversion, and includes the parts
4715 of mask conversion that are needed for gather and scatter
4716 internal functions. */
4717 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4718 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4721 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4723 /* Mark statements that are involved in a pattern. */
4725 static inline void
4726 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4727 tree pattern_vectype)
4729 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4730 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4732 bool old_pattern_p = is_pattern_stmt_p (orig_stmt_info);
4733 if (old_pattern_p)
4735 /* We're replacing a statement in an existing pattern definition
4736 sequence. */
4737 if (dump_enabled_p ())
4739 dump_printf_loc (MSG_NOTE, vect_location,
4740 "replacing earlier pattern ");
4741 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, orig_stmt, 0);
4744 /* To keep the book-keeping simple, just swap the lhs of the
4745 old and new statements, so that the old one has a valid but
4746 unused lhs. */
4747 tree old_lhs = gimple_get_lhs (orig_stmt);
4748 gimple_set_lhs (orig_stmt, gimple_get_lhs (pattern_stmt));
4749 gimple_set_lhs (pattern_stmt, old_lhs);
4751 if (dump_enabled_p ())
4753 dump_printf_loc (MSG_NOTE, vect_location, "with ");
4754 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4757 /* Switch to the statement that ORIG replaces. */
4758 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
4760 /* We shouldn't be replacing the main pattern statement. */
4761 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) != orig_stmt);
4764 if (def_seq)
4765 for (gimple_stmt_iterator si = gsi_start (def_seq);
4766 !gsi_end_p (si); gsi_next (&si))
4767 vect_init_pattern_stmt (gsi_stmt (si), orig_stmt_info, pattern_vectype);
4769 if (old_pattern_p)
4771 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4773 /* Insert all the new pattern statements before the original one. */
4774 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4775 gimple_stmt_iterator gsi = gsi_for_stmt (orig_stmt, orig_def_seq);
4776 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4777 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4779 /* Remove the pattern statement that this new pattern replaces. */
4780 gsi_remove (&gsi, false);
4782 else
4783 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4786 /* Function vect_pattern_recog_1
4788 Input:
4789 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4790 computation pattern.
4791 STMT: A stmt from which the pattern search should start.
4793 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4794 a sequence of statements that has the same functionality and can be
4795 used to replace STMT. It returns the last statement in the sequence
4796 and adds any earlier statements to STMT's STMT_VINFO_PATTERN_DEF_SEQ.
4797 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4798 statement, having first checked that the target supports the new operation
4799 in that type.
4801 This function also does some bookkeeping, as explained in the documentation
4802 for vect_recog_pattern. */
4804 static void
4805 vect_pattern_recog_1 (vect_recog_func *recog_func, gimple_stmt_iterator si)
4807 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4808 stmt_vec_info stmt_info;
4809 loop_vec_info loop_vinfo;
4810 tree pattern_vectype;
4812 /* If this statement has already been replaced with pattern statements,
4813 leave the original statement alone, since the first match wins.
4814 Instead try to match against the definition statements that feed
4815 the main pattern statement. */
4816 stmt_info = vinfo_for_stmt (stmt);
4817 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4819 gimple_stmt_iterator gsi;
4820 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4821 !gsi_end_p (gsi); gsi_next (&gsi))
4822 vect_pattern_recog_1 (recog_func, gsi);
4823 return;
4826 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4827 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
4828 if (!pattern_stmt)
4830 /* Clear any half-formed pattern definition sequence. */
4831 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4832 return;
4835 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4836 gcc_assert (pattern_vectype);
4838 /* Found a vectorizable pattern. */
4839 if (dump_enabled_p ())
4841 dump_printf_loc (MSG_NOTE, vect_location,
4842 "%s pattern recognized: ", recog_func->name);
4843 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4846 /* Mark the stmts that are involved in the pattern. */
4847 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4849 /* Patterns cannot be vectorized using SLP, because they change the order of
4850 computation. */
4851 if (loop_vinfo)
4853 unsigned ix, ix2;
4854 stmt_vec_info *elem_ptr;
4855 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4856 elem_ptr, *elem_ptr == stmt_info);
4861 /* Function vect_pattern_recog
4863 Input:
4864 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4865 computation idioms.
4867 Output - for each computation idiom that is detected we create a new stmt
4868 that provides the same functionality and that can be vectorized. We
4869 also record some information in the struct_stmt_info of the relevant
4870 stmts, as explained below:
4872 At the entry to this function we have the following stmts, with the
4873 following initial value in the STMT_VINFO fields:
4875 stmt in_pattern_p related_stmt vec_stmt
4876 S1: a_i = .... - - -
4877 S2: a_2 = ..use(a_i).. - - -
4878 S3: a_1 = ..use(a_2).. - - -
4879 S4: a_0 = ..use(a_1).. - - -
4880 S5: ... = ..use(a_0).. - - -
4882 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4883 represented by a single stmt. We then:
4884 - create a new stmt S6 equivalent to the pattern (the stmt is not
4885 inserted into the code)
4886 - fill in the STMT_VINFO fields as follows:
4888 in_pattern_p related_stmt vec_stmt
4889 S1: a_i = .... - - -
4890 S2: a_2 = ..use(a_i).. - - -
4891 S3: a_1 = ..use(a_2).. - - -
4892 S4: a_0 = ..use(a_1).. true S6 -
4893 '---> S6: a_new = .... - S4 -
4894 S5: ... = ..use(a_0).. - - -
4896 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4897 to each other through the RELATED_STMT field).
4899 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4900 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4901 remain irrelevant unless used by stmts other than S4.
4903 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4904 (because they are marked as irrelevant). It will vectorize S6, and record
4905 a pointer to the new vector stmt VS6 from S6 (as usual).
4906 S4 will be skipped, and S5 will be vectorized as usual:
4908 in_pattern_p related_stmt vec_stmt
4909 S1: a_i = .... - - -
4910 S2: a_2 = ..use(a_i).. - - -
4911 S3: a_1 = ..use(a_2).. - - -
4912 > VS6: va_new = .... - - -
4913 S4: a_0 = ..use(a_1).. true S6 VS6
4914 '---> S6: a_new = .... - S4 VS6
4915 > VS5: ... = ..vuse(va_new).. - - -
4916 S5: ... = ..use(a_0).. - - -
4918 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4919 elsewhere), and we'll end up with:
4921 VS6: va_new = ....
4922 VS5: ... = ..vuse(va_new)..
4924 In case of more than one pattern statements, e.g., widen-mult with
4925 intermediate type:
4927 S1 a_t = ;
4928 S2 a_T = (TYPE) a_t;
4929 '--> S3: a_it = (interm_type) a_t;
4930 S4 prod_T = a_T * CONST;
4931 '--> S5: prod_T' = a_it w* CONST;
4933 there may be other users of a_T outside the pattern. In that case S2 will
4934 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4935 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4936 be recorded in S3. */
4938 void
4939 vect_pattern_recog (vec_info *vinfo)
4941 struct loop *loop;
4942 basic_block *bbs;
4943 unsigned int nbbs;
4944 gimple_stmt_iterator si;
4945 unsigned int i, j;
4947 vect_determine_precisions (vinfo);
4949 DUMP_VECT_SCOPE ("vect_pattern_recog");
4951 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4953 loop = LOOP_VINFO_LOOP (loop_vinfo);
4954 bbs = LOOP_VINFO_BBS (loop_vinfo);
4955 nbbs = loop->num_nodes;
4957 /* Scan through the loop stmts, applying the pattern recognition
4958 functions starting at each stmt visited: */
4959 for (i = 0; i < nbbs; i++)
4961 basic_block bb = bbs[i];
4962 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4963 /* Scan over all generic vect_recog_xxx_pattern functions. */
4964 for (j = 0; j < NUM_PATTERNS; j++)
4965 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si);
4968 else
4970 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4971 for (si = bb_vinfo->region_begin;
4972 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4974 gimple *stmt = gsi_stmt (si);
4975 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
4976 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
4977 continue;
4979 /* Scan over all generic vect_recog_xxx_pattern functions. */
4980 for (j = 0; j < NUM_PATTERNS; j++)
4981 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si);