[7/n] PR85694: Add a vect_pattern_detected helper
[official-gcc.git] / gcc / tree-vect-patterns.c
blob3d7f866fe375364e41e866fec957ee3b2b821e76
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Pattern recognition functions */
51 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
52 tree *);
53 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
54 tree *);
55 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
56 tree *);
57 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
58 tree *);
59 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
60 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
61 tree *);
62 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
63 tree *, tree *);
64 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
65 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
66 tree *, tree *);
67 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
68 tree *, tree *);
70 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
71 tree *, tree *);
73 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
74 tree *, tree *);
75 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
76 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
77 static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
78 tree *);
80 struct vect_recog_func
82 vect_recog_func_ptr fn;
83 const char *name;
86 /* Note that ordering matters - the first pattern matching on a stmt
87 is taken which means usually the more complex one needs to preceed
88 the less comples onex (widen_sum only after dot_prod or sad for example). */
89 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
90 { vect_recog_widen_mult_pattern, "widen_mult" },
91 { vect_recog_dot_prod_pattern, "dot_prod" },
92 { vect_recog_sad_pattern, "sad" },
93 { vect_recog_widen_sum_pattern, "widen_sum" },
94 { vect_recog_pow_pattern, "pow" },
95 { vect_recog_widen_shift_pattern, "widen_shift" },
96 { vect_recog_over_widening_pattern, "over_widening" },
97 { vect_recog_rotate_pattern, "rotate" },
98 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
99 { vect_recog_divmod_pattern, "divmod" },
100 { vect_recog_mult_pattern, "mult" },
101 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
102 { vect_recog_bool_pattern, "bool" },
103 /* This must come before mask conversion, and includes the parts
104 of mask conversion that are needed for gather and scatter
105 internal functions. */
106 { vect_recog_gather_scatter_pattern, "gather_scatter" },
107 { vect_recog_mask_conversion_pattern, "mask_conversion" }
110 /* Report that we've found an instance of pattern PATTERN in
111 statement STMT. */
113 static void
114 vect_pattern_detected (const char *name, gimple *stmt)
116 if (dump_enabled_p ())
118 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: ", name);
119 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
123 static inline void
124 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
126 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
127 stmt);
130 static inline void
131 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
133 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
134 append_pattern_def_seq (stmt_info, stmt);
137 /* Check whether STMT2 is in the same loop or basic block as STMT1.
138 Which of the two applies depends on whether we're currently doing
139 loop-based or basic-block-based vectorization, as determined by
140 the vinfo_for_stmt for STMT1 (which must be defined).
142 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
143 to be defined as well. */
145 static bool
146 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
148 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
149 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
152 /* If the LHS of DEF_STMT has a single use, and that statement is
153 in the same loop or basic block, return it. */
155 static gimple *
156 vect_single_imm_use (gimple *def_stmt)
158 tree lhs = gimple_assign_lhs (def_stmt);
159 use_operand_p use_p;
160 gimple *use_stmt;
162 if (!single_imm_use (lhs, &use_p, &use_stmt))
163 return NULL;
165 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
166 return NULL;
168 return use_stmt;
171 /* If OP is defined by a statement that's being considered for vectorization,
172 return information about that statement, otherwise return NULL. */
174 static stmt_vec_info
175 vect_get_internal_def (vec_info *vinfo, tree op)
177 vect_def_type dt;
178 gimple *def_stmt;
179 if (TREE_CODE (op) != SSA_NAME
180 || !vect_is_simple_use (op, vinfo, &def_stmt, &dt)
181 || dt != vect_internal_def)
182 return NULL;
184 return vinfo_for_stmt (def_stmt);
187 /* Check whether NAME, an ssa-name used in USE_STMT,
188 is a result of a type promotion, such that:
189 DEF_STMT: NAME = NOP (name0)
190 If CHECK_SIGN is TRUE, check that either both types are signed or both are
191 unsigned. */
193 static bool
194 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
195 tree *orig_type, gimple **def_stmt, bool *promotion)
197 gimple *dummy_gimple;
198 stmt_vec_info stmt_vinfo;
199 tree type = TREE_TYPE (name);
200 tree oprnd0;
201 enum vect_def_type dt;
203 stmt_vinfo = vinfo_for_stmt (use_stmt);
204 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
205 return false;
207 if (dt != vect_internal_def
208 && dt != vect_external_def && dt != vect_constant_def)
209 return false;
211 if (!*def_stmt)
212 return false;
214 if (dt == vect_internal_def)
216 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
217 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
218 return false;
221 if (!is_gimple_assign (*def_stmt))
222 return false;
224 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
225 return false;
227 oprnd0 = gimple_assign_rhs1 (*def_stmt);
229 *orig_type = TREE_TYPE (oprnd0);
230 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
231 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
232 return false;
234 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
235 *promotion = true;
236 else
237 *promotion = false;
239 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
240 return false;
242 return true;
245 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
246 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
248 static tree
249 vect_recog_temp_ssa_var (tree type, gimple *stmt)
251 return make_temp_ssa_name (type, stmt, "patt");
254 /* Return true if STMT_VINFO describes a reduction for which reassociation
255 is allowed. If STMT_INFO is part of a group, assume that it's part of
256 a reduction chain and optimistically assume that all statements
257 except the last allow reassociation. */
259 static bool
260 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
262 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
263 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
264 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
267 /* Function vect_recog_dot_prod_pattern
269 Try to find the following pattern:
271 type x_t, y_t;
272 TYPE1 prod;
273 TYPE2 sum = init;
274 loop:
275 sum_0 = phi <init, sum_1>
276 S1 x_t = ...
277 S2 y_t = ...
278 S3 x_T = (TYPE1) x_t;
279 S4 y_T = (TYPE1) y_t;
280 S5 prod = x_T * y_T;
281 [S6 prod = (TYPE2) prod; #optional]
282 S7 sum_1 = prod + sum_0;
284 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
285 same size of 'TYPE1' or bigger. This is a special case of a reduction
286 computation.
288 Input:
290 * STMTS: Contains a stmt from which the pattern search begins. In the
291 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
292 will be detected.
294 Output:
296 * TYPE_IN: The type of the input arguments to the pattern.
298 * TYPE_OUT: The type of the output of this pattern.
300 * Return value: A new stmt that will be used to replace the sequence of
301 stmts that constitute the pattern. In this case it will be:
302 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
304 Note: The dot-prod idiom is a widening reduction pattern that is
305 vectorized without preserving all the intermediate results. It
306 produces only N/2 (widened) results (by summing up pairs of
307 intermediate results) rather than all N results. Therefore, we
308 cannot allow this pattern when we want to get all the results and in
309 the correct order (as is the case when this computation is in an
310 inner-loop nested in an outer-loop that us being vectorized). */
312 static gimple *
313 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
314 tree *type_out)
316 gimple *stmt, *last_stmt = (*stmts)[0];
317 tree oprnd0, oprnd1;
318 tree oprnd00, oprnd01;
319 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
320 vec_info *vinfo = stmt_vinfo->vinfo;
321 tree type, half_type;
322 gimple *pattern_stmt;
323 tree prod_type;
324 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
325 struct loop *loop;
326 tree var;
327 bool promotion;
329 if (!loop_info)
330 return NULL;
332 loop = LOOP_VINFO_LOOP (loop_info);
334 /* We don't allow changing the order of the computation in the inner-loop
335 when doing outer-loop vectorization. */
336 if (loop && nested_in_vect_loop_p (loop, last_stmt))
337 return NULL;
339 if (!is_gimple_assign (last_stmt))
340 return NULL;
342 type = gimple_expr_type (last_stmt);
344 /* Look for the following pattern
345 DX = (TYPE1) X;
346 DY = (TYPE1) Y;
347 DPROD = DX * DY;
348 DDPROD = (TYPE2) DPROD;
349 sum_1 = DDPROD + sum_0;
350 In which
351 - DX is double the size of X
352 - DY is double the size of Y
353 - DX, DY, DPROD all have the same type
354 - sum is the same size of DPROD or bigger
355 - sum has been recognized as a reduction variable.
357 This is equivalent to:
358 DPROD = X w* Y; #widen mult
359 sum_1 = DPROD w+ sum_0; #widen summation
361 DPROD = X w* Y; #widen mult
362 sum_1 = DPROD + sum_0; #summation
365 /* Starting from LAST_STMT, follow the defs of its uses in search
366 of the above pattern. */
368 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
369 return NULL;
371 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
372 return NULL;
374 if (!vect_reassociating_reduction_p (stmt_vinfo))
375 return NULL;
377 oprnd0 = gimple_assign_rhs1 (last_stmt);
378 oprnd1 = gimple_assign_rhs2 (last_stmt);
379 stmt = last_stmt;
381 gimple *def_stmt;
382 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
383 &promotion)
384 && promotion)
386 stmt = def_stmt;
387 oprnd0 = gimple_assign_rhs1 (stmt);
389 else
390 half_type = type;
392 /* So far so good. Since last_stmt was detected as a (summation) reduction,
393 we know that oprnd1 is the reduction variable (defined by a loop-header
394 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
395 Left to check that oprnd0 is defined by a (widen_)mult_expr */
396 if (TREE_CODE (oprnd0) != SSA_NAME)
397 return NULL;
399 prod_type = half_type;
400 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
401 if (!mult_vinfo)
402 return NULL;
404 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
405 inside the loop (in case we are analyzing an outer-loop). */
406 gassign *mult = dyn_cast <gassign *> (mult_vinfo->stmt);
407 if (!mult || gimple_assign_rhs_code (mult) != MULT_EXPR)
408 return NULL;
409 if (STMT_VINFO_IN_PATTERN_P (mult_vinfo))
411 /* Has been detected as a widening multiplication? */
413 mult = dyn_cast <gassign *> (STMT_VINFO_RELATED_STMT (mult_vinfo));
414 if (!mult || gimple_assign_rhs_code (mult) != WIDEN_MULT_EXPR)
415 return NULL;
416 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)
417 = STMT_VINFO_PATTERN_DEF_SEQ (mult_vinfo);
418 mult_vinfo = vinfo_for_stmt (mult);
419 gcc_assert (mult_vinfo);
420 gcc_assert (STMT_VINFO_DEF_TYPE (mult_vinfo) == vect_internal_def);
421 oprnd00 = gimple_assign_rhs1 (mult);
422 oprnd01 = gimple_assign_rhs2 (mult);
424 else
426 tree half_type0, half_type1;
427 gimple *def_stmt;
428 tree oprnd0, oprnd1;
430 oprnd0 = gimple_assign_rhs1 (mult);
431 oprnd1 = gimple_assign_rhs2 (mult);
432 if (!type_conversion_p (oprnd0, mult, true, &half_type0, &def_stmt,
433 &promotion)
434 || !promotion)
435 return NULL;
436 oprnd00 = gimple_assign_rhs1 (def_stmt);
437 if (!type_conversion_p (oprnd1, mult, true, &half_type1, &def_stmt,
438 &promotion)
439 || !promotion)
440 return NULL;
441 oprnd01 = gimple_assign_rhs1 (def_stmt);
442 if (!types_compatible_p (half_type0, half_type1))
443 return NULL;
444 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
445 return NULL;
448 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
450 half_type = TREE_TYPE (oprnd00);
451 *type_in = half_type;
452 *type_out = type;
454 var = vect_recog_temp_ssa_var (type, NULL);
455 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
456 oprnd00, oprnd01, oprnd1);
458 return pattern_stmt;
462 /* Function vect_recog_sad_pattern
464 Try to find the following Sum of Absolute Difference (SAD) pattern:
466 type x_t, y_t;
467 signed TYPE1 diff, abs_diff;
468 TYPE2 sum = init;
469 loop:
470 sum_0 = phi <init, sum_1>
471 S1 x_t = ...
472 S2 y_t = ...
473 S3 x_T = (TYPE1) x_t;
474 S4 y_T = (TYPE1) y_t;
475 S5 diff = x_T - y_T;
476 S6 abs_diff = ABS_EXPR <diff>;
477 [S7 abs_diff = (TYPE2) abs_diff; #optional]
478 S8 sum_1 = abs_diff + sum_0;
480 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
481 same size of 'TYPE1' or bigger. This is a special case of a reduction
482 computation.
484 Input:
486 * STMTS: Contains a stmt from which the pattern search begins. In the
487 example, when this function is called with S8, the pattern
488 {S3,S4,S5,S6,S7,S8} will be detected.
490 Output:
492 * TYPE_IN: The type of the input arguments to the pattern.
494 * TYPE_OUT: The type of the output of this pattern.
496 * Return value: A new stmt that will be used to replace the sequence of
497 stmts that constitute the pattern. In this case it will be:
498 SAD_EXPR <x_t, y_t, sum_0>
501 static gimple *
502 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
503 tree *type_out)
505 gimple *last_stmt = (*stmts)[0];
506 tree sad_oprnd0, sad_oprnd1;
507 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
508 vec_info *vinfo = stmt_vinfo->vinfo;
509 tree half_type;
510 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
511 struct loop *loop;
512 bool promotion;
514 if (!loop_info)
515 return NULL;
517 loop = LOOP_VINFO_LOOP (loop_info);
519 /* We don't allow changing the order of the computation in the inner-loop
520 when doing outer-loop vectorization. */
521 if (loop && nested_in_vect_loop_p (loop, last_stmt))
522 return NULL;
524 if (!is_gimple_assign (last_stmt))
525 return NULL;
527 tree sum_type = gimple_expr_type (last_stmt);
529 /* Look for the following pattern
530 DX = (TYPE1) X;
531 DY = (TYPE1) Y;
532 DDIFF = DX - DY;
533 DAD = ABS_EXPR <DDIFF>;
534 DDPROD = (TYPE2) DPROD;
535 sum_1 = DAD + sum_0;
536 In which
537 - DX is at least double the size of X
538 - DY is at least double the size of Y
539 - DX, DY, DDIFF, DAD all have the same type
540 - sum is the same size of DAD or bigger
541 - sum has been recognized as a reduction variable.
543 This is equivalent to:
544 DDIFF = X w- Y; #widen sub
545 DAD = ABS_EXPR <DDIFF>;
546 sum_1 = DAD w+ sum_0; #widen summation
548 DDIFF = X w- Y; #widen sub
549 DAD = ABS_EXPR <DDIFF>;
550 sum_1 = DAD + sum_0; #summation
553 /* Starting from LAST_STMT, follow the defs of its uses in search
554 of the above pattern. */
556 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
557 return NULL;
559 tree plus_oprnd0, plus_oprnd1;
561 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
562 return NULL;
564 if (!vect_reassociating_reduction_p (stmt_vinfo))
565 return NULL;
567 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
568 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
570 /* The type conversion could be promotion, demotion,
571 or just signed -> unsigned. */
572 gimple *def_stmt;
573 if (type_conversion_p (plus_oprnd0, last_stmt, false,
574 &half_type, &def_stmt, &promotion))
575 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
576 else
577 half_type = sum_type;
579 /* So far so good. Since last_stmt was detected as a (summation) reduction,
580 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
581 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
582 Then check that plus_oprnd0 is defined by an abs_expr. */
584 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
585 return NULL;
587 tree abs_type = half_type;
588 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
589 if (!abs_stmt_vinfo)
590 return NULL;
592 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
593 inside the loop (in case we are analyzing an outer-loop). */
594 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
595 if (!abs_stmt
596 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
597 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
598 return NULL;
600 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
601 if (TYPE_UNSIGNED (abs_type))
602 return NULL;
604 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
606 if (TREE_CODE (abs_oprnd) != SSA_NAME)
607 return NULL;
609 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
610 if (!diff_stmt_vinfo)
611 return NULL;
613 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
614 inside the loop (in case we are analyzing an outer-loop). */
615 gassign *diff_stmt = dyn_cast <gassign *> (diff_stmt_vinfo->stmt);
616 if (!diff_stmt || gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
617 return NULL;
619 tree half_type0, half_type1;
621 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
622 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
624 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
625 &half_type0, &def_stmt, &promotion)
626 || !promotion)
627 return NULL;
628 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
630 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
631 &half_type1, &def_stmt, &promotion)
632 || !promotion)
633 return NULL;
634 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
636 if (!types_compatible_p (half_type0, half_type1))
637 return NULL;
638 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
639 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
640 return NULL;
642 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
644 *type_in = TREE_TYPE (sad_oprnd0);
645 *type_out = sum_type;
647 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
648 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
649 sad_oprnd1, plus_oprnd1);
651 return pattern_stmt;
655 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
656 and LSHIFT_EXPR.
658 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
659 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
661 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
662 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
663 that satisfies the above restrictions, we can perform a widening opeartion
664 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
665 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
667 static bool
668 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
669 tree const_oprnd, tree *oprnd,
670 gimple **wstmt, tree type,
671 tree *half_type, gimple *def_stmt)
673 tree new_type, new_oprnd;
675 if (code != MULT_EXPR && code != LSHIFT_EXPR)
676 return false;
678 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
679 || (code == LSHIFT_EXPR
680 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
681 != 1))
682 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
684 /* CONST_OPRND is a constant of HALF_TYPE. */
685 *oprnd = gimple_assign_rhs1 (def_stmt);
686 return true;
689 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
690 return false;
692 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
693 return false;
695 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
696 a type 2 times bigger than HALF_TYPE. */
697 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
698 TYPE_UNSIGNED (type));
699 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
700 || (code == LSHIFT_EXPR
701 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
702 return false;
704 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
705 *oprnd = gimple_assign_rhs1 (def_stmt);
706 new_oprnd = make_ssa_name (new_type);
707 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
708 *oprnd = new_oprnd;
710 *half_type = new_type;
711 return true;
715 /* Function vect_recog_widen_mult_pattern
717 Try to find the following pattern:
719 type1 a_t;
720 type2 b_t;
721 TYPE a_T, b_T, prod_T;
723 S1 a_t = ;
724 S2 b_t = ;
725 S3 a_T = (TYPE) a_t;
726 S4 b_T = (TYPE) b_t;
727 S5 prod_T = a_T * b_T;
729 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
731 Also detect unsigned cases:
733 unsigned type1 a_t;
734 unsigned type2 b_t;
735 unsigned TYPE u_prod_T;
736 TYPE a_T, b_T, prod_T;
738 S1 a_t = ;
739 S2 b_t = ;
740 S3 a_T = (TYPE) a_t;
741 S4 b_T = (TYPE) b_t;
742 S5 prod_T = a_T * b_T;
743 S6 u_prod_T = (unsigned TYPE) prod_T;
745 and multiplication by constants:
747 type a_t;
748 TYPE a_T, prod_T;
750 S1 a_t = ;
751 S3 a_T = (TYPE) a_t;
752 S5 prod_T = a_T * CONST;
754 A special case of multiplication by constants is when 'TYPE' is 4 times
755 bigger than 'type', but CONST fits an intermediate type 2 times smaller
756 than 'TYPE'. In that case we create an additional pattern stmt for S3
757 to create a variable of the intermediate type, and perform widen-mult
758 on the intermediate type as well:
760 type a_t;
761 interm_type a_it;
762 TYPE a_T, prod_T, prod_T';
764 S1 a_t = ;
765 S3 a_T = (TYPE) a_t;
766 '--> a_it = (interm_type) a_t;
767 S5 prod_T = a_T * CONST;
768 '--> prod_T' = a_it w* CONST;
770 Input/Output:
772 * STMTS: Contains a stmt from which the pattern search begins. In the
773 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
774 is detected. In case of unsigned widen-mult, the original stmt (S5) is
775 replaced with S6 in STMTS. In case of multiplication by a constant
776 of an intermediate type (the last case above), STMTS also contains S3
777 (inserted before S5).
779 Output:
781 * TYPE_IN: The type of the input arguments to the pattern.
783 * TYPE_OUT: The type of the output of this pattern.
785 * Return value: A new stmt that will be used to replace the sequence of
786 stmts that constitute the pattern. In this case it will be:
787 WIDEN_MULT <a_t, b_t>
788 If the result of WIDEN_MULT needs to be converted to a larger type, the
789 returned stmt will be this type conversion stmt.
792 static gimple *
793 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
794 tree *type_in, tree *type_out)
796 gimple *last_stmt = stmts->pop ();
797 gimple *def_stmt0, *def_stmt1;
798 tree oprnd0, oprnd1;
799 tree type, half_type0, half_type1;
800 gimple *new_stmt = NULL, *pattern_stmt = NULL;
801 tree vectype, vecitype;
802 tree var;
803 enum tree_code dummy_code;
804 int dummy_int;
805 vec<tree> dummy_vec;
806 bool op1_ok;
807 bool promotion;
809 if (!is_gimple_assign (last_stmt))
810 return NULL;
812 type = gimple_expr_type (last_stmt);
814 /* Starting from LAST_STMT, follow the defs of its uses in search
815 of the above pattern. */
817 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
818 return NULL;
820 oprnd0 = gimple_assign_rhs1 (last_stmt);
821 oprnd1 = gimple_assign_rhs2 (last_stmt);
823 /* Check argument 0. */
824 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
825 &promotion)
826 || !promotion)
827 return NULL;
828 /* Check argument 1. */
829 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
830 &def_stmt1, &promotion);
832 if (op1_ok && promotion)
834 oprnd0 = gimple_assign_rhs1 (def_stmt0);
835 oprnd1 = gimple_assign_rhs1 (def_stmt1);
837 else
839 if (TREE_CODE (oprnd1) == INTEGER_CST
840 && TREE_CODE (half_type0) == INTEGER_TYPE
841 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
842 &oprnd0, &new_stmt, type,
843 &half_type0, def_stmt0))
845 half_type1 = half_type0;
846 oprnd1 = fold_convert (half_type1, oprnd1);
848 else
849 return NULL;
852 /* If the two arguments have different sizes, convert the one with
853 the smaller type into the larger type. */
854 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
856 /* If we already used up the single-stmt slot give up. */
857 if (new_stmt)
858 return NULL;
860 tree* oprnd = NULL;
861 gimple *def_stmt = NULL;
863 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
865 def_stmt = def_stmt0;
866 half_type0 = half_type1;
867 oprnd = &oprnd0;
869 else
871 def_stmt = def_stmt1;
872 half_type1 = half_type0;
873 oprnd = &oprnd1;
876 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
877 tree new_oprnd = make_ssa_name (half_type0);
878 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
879 *oprnd = new_oprnd;
882 /* Handle unsigned case. Look for
883 S6 u_prod_T = (unsigned TYPE) prod_T;
884 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
885 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
887 gimple *use_stmt;
888 tree use_lhs;
889 tree use_type;
891 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
892 return NULL;
894 use_stmt = vect_single_imm_use (last_stmt);
895 if (!use_stmt || !is_gimple_assign (use_stmt)
896 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
897 return NULL;
899 use_lhs = gimple_assign_lhs (use_stmt);
900 use_type = TREE_TYPE (use_lhs);
901 if (!INTEGRAL_TYPE_P (use_type)
902 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
903 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
904 return NULL;
906 type = use_type;
907 last_stmt = use_stmt;
910 if (!types_compatible_p (half_type0, half_type1))
911 return NULL;
913 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
914 to get an intermediate result of type ITYPE. In this case we need
915 to build a statement to convert this intermediate result to type TYPE. */
916 tree itype = type;
917 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
918 itype = build_nonstandard_integer_type
919 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
920 TYPE_UNSIGNED (type));
922 /* Pattern detected. */
923 vect_pattern_detected ("vect_recog_widen_mult_pattern", last_stmt);
925 /* Check target support */
926 vectype = get_vectype_for_scalar_type (half_type0);
927 vecitype = get_vectype_for_scalar_type (itype);
928 if (!vectype
929 || !vecitype
930 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
931 vecitype, vectype,
932 &dummy_code, &dummy_code,
933 &dummy_int, &dummy_vec))
934 return NULL;
936 *type_in = vectype;
937 *type_out = get_vectype_for_scalar_type (type);
939 /* Pattern supported. Create a stmt to be used to replace the pattern: */
940 var = vect_recog_temp_ssa_var (itype, NULL);
941 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
943 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
944 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
946 /* If the original two operands have different sizes, we may need to convert
947 the smaller one into the larget type. If this is the case, at this point
948 the new stmt is already built. */
949 if (new_stmt)
951 append_pattern_def_seq (stmt_vinfo, new_stmt);
952 stmt_vec_info new_stmt_info
953 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
954 set_vinfo_for_stmt (new_stmt, new_stmt_info);
955 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
958 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
959 the result of the widen-mult operation into type TYPE. */
960 if (itype != type)
962 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
963 stmt_vec_info pattern_stmt_info
964 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
965 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
966 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
967 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
968 NOP_EXPR,
969 gimple_assign_lhs (pattern_stmt));
972 stmts->safe_push (last_stmt);
973 return pattern_stmt;
977 /* Function vect_recog_pow_pattern
979 Try to find the following pattern:
981 x = POW (y, N);
983 with POW being one of pow, powf, powi, powif and N being
984 either 2 or 0.5.
986 Input:
988 * LAST_STMT: A stmt from which the pattern search begins.
990 Output:
992 * TYPE_IN: The type of the input arguments to the pattern.
994 * TYPE_OUT: The type of the output of this pattern.
996 * Return value: A new stmt that will be used to replace the sequence of
997 stmts that constitute the pattern. In this case it will be:
998 x = x * x
1000 x = sqrt (x)
1003 static gimple *
1004 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1005 tree *type_out)
1007 gimple *last_stmt = (*stmts)[0];
1008 tree base, exp;
1009 gimple *stmt;
1010 tree var;
1012 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1013 return NULL;
1015 switch (gimple_call_combined_fn (last_stmt))
1017 CASE_CFN_POW:
1018 CASE_CFN_POWI:
1019 break;
1021 default:
1022 return NULL;
1025 base = gimple_call_arg (last_stmt, 0);
1026 exp = gimple_call_arg (last_stmt, 1);
1027 if (TREE_CODE (exp) != REAL_CST
1028 && TREE_CODE (exp) != INTEGER_CST)
1030 if (flag_unsafe_math_optimizations
1031 && TREE_CODE (base) == REAL_CST
1032 && !gimple_call_internal_p (last_stmt))
1034 combined_fn log_cfn;
1035 built_in_function exp_bfn;
1036 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1038 case BUILT_IN_POW:
1039 log_cfn = CFN_BUILT_IN_LOG;
1040 exp_bfn = BUILT_IN_EXP;
1041 break;
1042 case BUILT_IN_POWF:
1043 log_cfn = CFN_BUILT_IN_LOGF;
1044 exp_bfn = BUILT_IN_EXPF;
1045 break;
1046 case BUILT_IN_POWL:
1047 log_cfn = CFN_BUILT_IN_LOGL;
1048 exp_bfn = BUILT_IN_EXPL;
1049 break;
1050 default:
1051 return NULL;
1053 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1054 tree exp_decl = builtin_decl_implicit (exp_bfn);
1055 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1056 does that, but if C is a power of 2, we want to use
1057 exp2 (log2 (C) * x) in the non-vectorized version, but for
1058 vectorization we don't have vectorized exp2. */
1059 if (logc
1060 && TREE_CODE (logc) == REAL_CST
1061 && exp_decl
1062 && lookup_attribute ("omp declare simd",
1063 DECL_ATTRIBUTES (exp_decl)))
1065 cgraph_node *node = cgraph_node::get_create (exp_decl);
1066 if (node->simd_clones == NULL)
1068 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1069 || node->definition)
1070 return NULL;
1071 expand_simd_clones (node);
1072 if (node->simd_clones == NULL)
1073 return NULL;
1075 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1076 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1077 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1078 new_pattern_def_seq (stmt_vinfo, g);
1079 *type_in = TREE_TYPE (base);
1080 *type_out = NULL_TREE;
1081 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1082 g = gimple_build_call (exp_decl, 1, def);
1083 gimple_call_set_lhs (g, res);
1084 return g;
1088 return NULL;
1091 /* We now have a pow or powi builtin function call with a constant
1092 exponent. */
1094 *type_out = NULL_TREE;
1096 /* Catch squaring. */
1097 if ((tree_fits_shwi_p (exp)
1098 && tree_to_shwi (exp) == 2)
1099 || (TREE_CODE (exp) == REAL_CST
1100 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1102 *type_in = TREE_TYPE (base);
1104 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1105 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1106 return stmt;
1109 /* Catch square root. */
1110 if (TREE_CODE (exp) == REAL_CST
1111 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1113 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1114 if (*type_in
1115 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1116 OPTIMIZE_FOR_SPEED))
1118 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1119 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1120 gimple_call_set_lhs (stmt, var);
1121 gimple_call_set_nothrow (stmt, true);
1122 return stmt;
1126 return NULL;
1130 /* Function vect_recog_widen_sum_pattern
1132 Try to find the following pattern:
1134 type x_t;
1135 TYPE x_T, sum = init;
1136 loop:
1137 sum_0 = phi <init, sum_1>
1138 S1 x_t = *p;
1139 S2 x_T = (TYPE) x_t;
1140 S3 sum_1 = x_T + sum_0;
1142 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1143 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1144 a special case of a reduction computation.
1146 Input:
1148 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1149 when this function is called with S3, the pattern {S2,S3} will be detected.
1151 Output:
1153 * TYPE_IN: The type of the input arguments to the pattern.
1155 * TYPE_OUT: The type of the output of this pattern.
1157 * Return value: A new stmt that will be used to replace the sequence of
1158 stmts that constitute the pattern. In this case it will be:
1159 WIDEN_SUM <x_t, sum_0>
1161 Note: The widening-sum idiom is a widening reduction pattern that is
1162 vectorized without preserving all the intermediate results. It
1163 produces only N/2 (widened) results (by summing up pairs of
1164 intermediate results) rather than all N results. Therefore, we
1165 cannot allow this pattern when we want to get all the results and in
1166 the correct order (as is the case when this computation is in an
1167 inner-loop nested in an outer-loop that us being vectorized). */
1169 static gimple *
1170 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1171 tree *type_out)
1173 gimple *stmt, *last_stmt = (*stmts)[0];
1174 tree oprnd0, oprnd1;
1175 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1176 tree type, half_type;
1177 gimple *pattern_stmt;
1178 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1179 struct loop *loop;
1180 tree var;
1181 bool promotion;
1183 if (!loop_info)
1184 return NULL;
1186 loop = LOOP_VINFO_LOOP (loop_info);
1188 /* We don't allow changing the order of the computation in the inner-loop
1189 when doing outer-loop vectorization. */
1190 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1191 return NULL;
1193 if (!is_gimple_assign (last_stmt))
1194 return NULL;
1196 type = gimple_expr_type (last_stmt);
1198 /* Look for the following pattern
1199 DX = (TYPE) X;
1200 sum_1 = DX + sum_0;
1201 In which DX is at least double the size of X, and sum_1 has been
1202 recognized as a reduction variable.
1205 /* Starting from LAST_STMT, follow the defs of its uses in search
1206 of the above pattern. */
1208 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1209 return NULL;
1211 if (!vect_reassociating_reduction_p (stmt_vinfo))
1212 return NULL;
1214 oprnd0 = gimple_assign_rhs1 (last_stmt);
1215 oprnd1 = gimple_assign_rhs2 (last_stmt);
1217 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1218 we know that oprnd1 is the reduction variable (defined by a loop-header
1219 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1220 Left to check that oprnd0 is defined by a cast from type 'type' to type
1221 'TYPE'. */
1223 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1224 &promotion)
1225 || !promotion)
1226 return NULL;
1228 oprnd0 = gimple_assign_rhs1 (stmt);
1230 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1232 *type_in = half_type;
1233 *type_out = type;
1235 var = vect_recog_temp_ssa_var (type, NULL);
1236 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1238 return pattern_stmt;
1242 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1244 Input:
1245 STMT - a statement to check.
1246 DEF - we support operations with two operands, one of which is constant.
1247 The other operand can be defined by a demotion operation, or by a
1248 previous statement in a sequence of over-promoted operations. In the
1249 later case DEF is used to replace that operand. (It is defined by a
1250 pattern statement we created for the previous statement in the
1251 sequence).
1253 Input/output:
1254 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1255 NULL, it's the type of DEF.
1256 STMTS - additional pattern statements. If a pattern statement (type
1257 conversion) is created in this function, its original statement is
1258 added to STMTS.
1260 Output:
1261 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1262 operands to use in the new pattern statement for STMT (will be created
1263 in vect_recog_over_widening_pattern ()).
1264 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1265 statements for STMT: the first one is a type promotion and the second
1266 one is the operation itself. We return the type promotion statement
1267 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1268 the second pattern statement. */
1270 static bool
1271 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1272 tree *op0, tree *op1, gimple **new_def_stmt,
1273 vec<gimple *> *stmts)
1275 enum tree_code code;
1276 tree const_oprnd, oprnd;
1277 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1278 gimple *def_stmt, *new_stmt;
1279 bool first = false;
1280 bool promotion;
1282 *op0 = NULL_TREE;
1283 *op1 = NULL_TREE;
1284 *new_def_stmt = NULL;
1286 if (!is_gimple_assign (stmt))
1287 return false;
1289 code = gimple_assign_rhs_code (stmt);
1290 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1291 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1292 return false;
1294 oprnd = gimple_assign_rhs1 (stmt);
1295 const_oprnd = gimple_assign_rhs2 (stmt);
1296 type = gimple_expr_type (stmt);
1298 if (TREE_CODE (oprnd) != SSA_NAME
1299 || TREE_CODE (const_oprnd) != INTEGER_CST)
1300 return false;
1302 /* If oprnd has other uses besides that in stmt we cannot mark it
1303 as being part of a pattern only. */
1304 if (!has_single_use (oprnd))
1305 return false;
1307 /* If we are in the middle of a sequence, we use DEF from a previous
1308 statement. Otherwise, OPRND has to be a result of type promotion. */
1309 if (*new_type)
1311 half_type = *new_type;
1312 oprnd = def;
1314 else
1316 first = true;
1317 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1318 &promotion)
1319 || !promotion
1320 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1321 return false;
1324 /* Can we perform the operation on a smaller type? */
1325 switch (code)
1327 case BIT_IOR_EXPR:
1328 case BIT_XOR_EXPR:
1329 case BIT_AND_EXPR:
1330 if (!int_fits_type_p (const_oprnd, half_type))
1332 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1333 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1334 return false;
1336 interm_type = build_nonstandard_integer_type (
1337 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1338 if (!int_fits_type_p (const_oprnd, interm_type))
1339 return false;
1342 break;
1344 case LSHIFT_EXPR:
1345 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1346 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1347 return false;
1349 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1350 (e.g., if the original value was char, the shift amount is at most 8
1351 if we want to use short). */
1352 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1353 return false;
1355 interm_type = build_nonstandard_integer_type (
1356 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1358 if (!vect_supportable_shift (code, interm_type))
1359 return false;
1361 break;
1363 case RSHIFT_EXPR:
1364 if (vect_supportable_shift (code, half_type))
1365 break;
1367 /* Try intermediate type - HALF_TYPE is not supported. */
1368 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1369 return false;
1371 interm_type = build_nonstandard_integer_type (
1372 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1374 if (!vect_supportable_shift (code, interm_type))
1375 return false;
1377 break;
1379 default:
1380 gcc_unreachable ();
1383 /* There are four possible cases:
1384 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1385 the first statement in the sequence)
1386 a. The original, HALF_TYPE, is not enough - we replace the promotion
1387 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1388 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1389 promotion.
1390 2. OPRND is defined by a pattern statement we created.
1391 a. Its type is not sufficient for the operation, we create a new stmt:
1392 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1393 this statement in NEW_DEF_STMT, and it is later put in
1394 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1395 b. OPRND is good to use in the new statement. */
1396 if (first)
1398 if (interm_type)
1400 /* Replace the original type conversion HALF_TYPE->TYPE with
1401 HALF_TYPE->INTERM_TYPE. */
1402 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1404 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1405 /* Check if the already created pattern stmt is what we need. */
1406 if (!is_gimple_assign (new_stmt)
1407 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1408 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1409 return false;
1411 stmts->safe_push (def_stmt);
1412 oprnd = gimple_assign_lhs (new_stmt);
1414 else
1416 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1417 oprnd = gimple_assign_rhs1 (def_stmt);
1418 new_oprnd = make_ssa_name (interm_type);
1419 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1420 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1421 stmts->safe_push (def_stmt);
1422 oprnd = new_oprnd;
1425 else
1427 /* Retrieve the operand before the type promotion. */
1428 oprnd = gimple_assign_rhs1 (def_stmt);
1431 else
1433 if (interm_type)
1435 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1436 new_oprnd = make_ssa_name (interm_type);
1437 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1438 oprnd = new_oprnd;
1439 *new_def_stmt = new_stmt;
1442 /* Otherwise, OPRND is already set. */
1445 if (interm_type)
1446 *new_type = interm_type;
1447 else
1448 *new_type = half_type;
1450 *op0 = oprnd;
1451 *op1 = fold_convert (*new_type, const_oprnd);
1453 return true;
1457 /* Try to find a statement or a sequence of statements that can be performed
1458 on a smaller type:
1460 type x_t;
1461 TYPE x_T, res0_T, res1_T;
1462 loop:
1463 S1 x_t = *p;
1464 S2 x_T = (TYPE) x_t;
1465 S3 res0_T = op (x_T, C0);
1466 S4 res1_T = op (res0_T, C1);
1467 S5 ... = () res1_T; - type demotion
1469 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1470 constants.
1471 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1472 be 'type' or some intermediate type. For now, we expect S5 to be a type
1473 demotion operation. We also check that S3 and S4 have only one use. */
1475 static gimple *
1476 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1477 tree *type_in, tree *type_out)
1479 gimple *stmt = stmts->pop ();
1480 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1481 *use_stmt = NULL;
1482 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1483 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1484 bool first;
1485 tree type = NULL;
1487 first = true;
1488 while (1)
1490 if (!vinfo_for_stmt (stmt)
1491 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1492 return NULL;
1494 new_def_stmt = NULL;
1495 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1496 &op0, &op1, &new_def_stmt,
1497 stmts))
1499 if (first)
1500 return NULL;
1501 else
1502 break;
1505 /* STMT can be performed on a smaller type. Check its uses. */
1506 use_stmt = vect_single_imm_use (stmt);
1507 if (!use_stmt || !is_gimple_assign (use_stmt))
1508 return NULL;
1510 /* Create pattern statement for STMT. */
1511 vectype = get_vectype_for_scalar_type (new_type);
1512 if (!vectype)
1513 return NULL;
1515 /* We want to collect all the statements for which we create pattern
1516 statetments, except for the case when the last statement in the
1517 sequence doesn't have a corresponding pattern statement. In such
1518 case we associate the last pattern statement with the last statement
1519 in the sequence. Therefore, we only add the original statement to
1520 the list if we know that it is not the last. */
1521 if (prev_stmt)
1522 stmts->safe_push (prev_stmt);
1524 var = vect_recog_temp_ssa_var (new_type, NULL);
1525 pattern_stmt
1526 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1527 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1528 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1530 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_NOTE, vect_location,
1533 "created pattern stmt: ");
1534 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1537 type = gimple_expr_type (stmt);
1538 prev_stmt = stmt;
1539 stmt = use_stmt;
1541 first = false;
1544 /* We got a sequence. We expect it to end with a type demotion operation.
1545 Otherwise, we quit (for now). There are three possible cases: the
1546 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1547 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1548 NEW_TYPE differs (we create a new conversion statement). */
1549 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1551 use_lhs = gimple_assign_lhs (use_stmt);
1552 use_type = TREE_TYPE (use_lhs);
1553 /* Support only type demotion or signedess change. */
1554 if (!INTEGRAL_TYPE_P (use_type)
1555 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1556 return NULL;
1558 /* Check that NEW_TYPE is not bigger than the conversion result. */
1559 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1560 return NULL;
1562 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1563 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1565 /* Create NEW_TYPE->USE_TYPE conversion. */
1566 new_oprnd = make_ssa_name (use_type);
1567 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1568 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1570 *type_in = get_vectype_for_scalar_type (new_type);
1571 *type_out = get_vectype_for_scalar_type (use_type);
1573 /* We created a pattern statement for the last statement in the
1574 sequence, so we don't need to associate it with the pattern
1575 statement created for PREV_STMT. Therefore, we add PREV_STMT
1576 to the list in order to mark it later in vect_pattern_recog_1. */
1577 if (prev_stmt)
1578 stmts->safe_push (prev_stmt);
1580 else
1582 if (prev_stmt)
1583 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1584 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1586 *type_in = vectype;
1587 *type_out = NULL_TREE;
1590 stmts->safe_push (use_stmt);
1592 else
1593 /* TODO: support general case, create a conversion to the correct type. */
1594 return NULL;
1596 /* Pattern detected. */
1597 vect_pattern_detected ("vect_recog_over_widening_pattern", stmts->last ());
1599 return pattern_stmt;
1602 /* Detect widening shift pattern:
1604 type a_t;
1605 TYPE a_T, res_T;
1607 S1 a_t = ;
1608 S2 a_T = (TYPE) a_t;
1609 S3 res_T = a_T << CONST;
1611 where type 'TYPE' is at least double the size of type 'type'.
1613 Also detect cases where the shift result is immediately converted
1614 to another type 'result_type' that is no larger in size than 'TYPE'.
1615 In those cases we perform a widen-shift that directly results in
1616 'result_type', to avoid a possible over-widening situation:
1618 type a_t;
1619 TYPE a_T, res_T;
1620 result_type res_result;
1622 S1 a_t = ;
1623 S2 a_T = (TYPE) a_t;
1624 S3 res_T = a_T << CONST;
1625 S4 res_result = (result_type) res_T;
1626 '--> res_result' = a_t w<< CONST;
1628 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1629 create an additional pattern stmt for S2 to create a variable of an
1630 intermediate type, and perform widen-shift on the intermediate type:
1632 type a_t;
1633 interm_type a_it;
1634 TYPE a_T, res_T, res_T';
1636 S1 a_t = ;
1637 S2 a_T = (TYPE) a_t;
1638 '--> a_it = (interm_type) a_t;
1639 S3 res_T = a_T << CONST;
1640 '--> res_T' = a_it <<* CONST;
1642 Input/Output:
1644 * STMTS: Contains a stmt from which the pattern search begins.
1645 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1646 in STMTS. When an intermediate type is used and a pattern statement is
1647 created for S2, we also put S2 here (before S3).
1649 Output:
1651 * TYPE_IN: The type of the input arguments to the pattern.
1653 * TYPE_OUT: The type of the output of this pattern.
1655 * Return value: A new stmt that will be used to replace the sequence of
1656 stmts that constitute the pattern. In this case it will be:
1657 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1659 static gimple *
1660 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1661 tree *type_in, tree *type_out)
1663 gimple *last_stmt = stmts->pop ();
1664 gimple *def_stmt0;
1665 tree oprnd0, oprnd1;
1666 tree type, half_type0;
1667 gimple *pattern_stmt;
1668 tree vectype, vectype_out = NULL_TREE;
1669 tree var;
1670 enum tree_code dummy_code;
1671 int dummy_int;
1672 vec<tree> dummy_vec;
1673 gimple *use_stmt;
1674 bool promotion;
1676 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1677 return NULL;
1679 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1680 return NULL;
1682 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1683 return NULL;
1685 oprnd0 = gimple_assign_rhs1 (last_stmt);
1686 oprnd1 = gimple_assign_rhs2 (last_stmt);
1687 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1688 return NULL;
1690 /* Check operand 0: it has to be defined by a type promotion. */
1691 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1692 &promotion)
1693 || !promotion)
1694 return NULL;
1696 /* Check operand 1: has to be positive. We check that it fits the type
1697 in vect_handle_widen_op_by_const (). */
1698 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1699 return NULL;
1701 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1702 type = gimple_expr_type (last_stmt);
1704 /* Check for subsequent conversion to another type. */
1705 use_stmt = vect_single_imm_use (last_stmt);
1706 if (use_stmt && is_gimple_assign (use_stmt)
1707 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1708 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1710 tree use_lhs = gimple_assign_lhs (use_stmt);
1711 tree use_type = TREE_TYPE (use_lhs);
1713 if (INTEGRAL_TYPE_P (use_type)
1714 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1716 last_stmt = use_stmt;
1717 type = use_type;
1721 /* Check if this a widening operation. */
1722 gimple *wstmt = NULL;
1723 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1724 &oprnd0, &wstmt,
1725 type, &half_type0, def_stmt0))
1726 return NULL;
1728 /* Pattern detected. */
1729 vect_pattern_detected ("vect_recog_widen_shift_pattern", last_stmt);
1731 /* Check target support. */
1732 vectype = get_vectype_for_scalar_type (half_type0);
1733 vectype_out = get_vectype_for_scalar_type (type);
1735 if (!vectype
1736 || !vectype_out
1737 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1738 vectype_out, vectype,
1739 &dummy_code, &dummy_code,
1740 &dummy_int, &dummy_vec))
1741 return NULL;
1743 *type_in = vectype;
1744 *type_out = vectype_out;
1746 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1747 var = vect_recog_temp_ssa_var (type, NULL);
1748 pattern_stmt
1749 = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1750 if (wstmt)
1752 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1753 new_pattern_def_seq (stmt_vinfo, wstmt);
1754 stmt_vec_info new_stmt_info
1755 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1756 set_vinfo_for_stmt (wstmt, new_stmt_info);
1757 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1760 stmts->safe_push (last_stmt);
1761 return pattern_stmt;
1764 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1766 type a_t, b_t, c_t;
1768 S0 a_t = b_t r<< c_t;
1770 Input/Output:
1772 * STMTS: Contains a stmt from which the pattern search begins,
1773 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1774 with a sequence:
1776 S1 d_t = -c_t;
1777 S2 e_t = d_t & (B - 1);
1778 S3 f_t = b_t << c_t;
1779 S4 g_t = b_t >> e_t;
1780 S0 a_t = f_t | g_t;
1782 where B is element bitsize of type.
1784 Output:
1786 * TYPE_IN: The type of the input arguments to the pattern.
1788 * TYPE_OUT: The type of the output of this pattern.
1790 * Return value: A new stmt that will be used to replace the rotate
1791 S0 stmt. */
1793 static gimple *
1794 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1796 gimple *last_stmt = stmts->pop ();
1797 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1798 gimple *pattern_stmt, *def_stmt;
1799 enum tree_code rhs_code;
1800 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1801 vec_info *vinfo = stmt_vinfo->vinfo;
1802 enum vect_def_type dt;
1803 optab optab1, optab2;
1804 edge ext_def = NULL;
1806 if (!is_gimple_assign (last_stmt))
1807 return NULL;
1809 rhs_code = gimple_assign_rhs_code (last_stmt);
1810 switch (rhs_code)
1812 case LROTATE_EXPR:
1813 case RROTATE_EXPR:
1814 break;
1815 default:
1816 return NULL;
1819 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1820 return NULL;
1822 lhs = gimple_assign_lhs (last_stmt);
1823 oprnd0 = gimple_assign_rhs1 (last_stmt);
1824 type = TREE_TYPE (oprnd0);
1825 oprnd1 = gimple_assign_rhs2 (last_stmt);
1826 if (TREE_CODE (oprnd0) != SSA_NAME
1827 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1828 || !INTEGRAL_TYPE_P (type)
1829 || !TYPE_UNSIGNED (type))
1830 return NULL;
1832 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1833 return NULL;
1835 if (dt != vect_internal_def
1836 && dt != vect_constant_def
1837 && dt != vect_external_def)
1838 return NULL;
1840 vectype = get_vectype_for_scalar_type (type);
1841 if (vectype == NULL_TREE)
1842 return NULL;
1844 /* If vector/vector or vector/scalar rotate is supported by the target,
1845 don't do anything here. */
1846 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1847 if (optab1
1848 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1849 return NULL;
1851 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1853 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1854 if (optab2
1855 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1856 return NULL;
1859 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1860 don't do anything here either. */
1861 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1862 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1863 if (!optab1
1864 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1865 || !optab2
1866 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1868 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1869 return NULL;
1870 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1871 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1872 if (!optab1
1873 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1874 || !optab2
1875 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1876 return NULL;
1879 *type_in = vectype;
1880 *type_out = vectype;
1881 if (*type_in == NULL_TREE)
1882 return NULL;
1884 if (dt == vect_external_def
1885 && TREE_CODE (oprnd1) == SSA_NAME
1886 && is_a <loop_vec_info> (vinfo))
1888 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1889 ext_def = loop_preheader_edge (loop);
1890 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1892 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1893 if (bb == NULL
1894 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1895 ext_def = NULL;
1899 def = NULL_TREE;
1900 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1901 if (TREE_CODE (oprnd1) == INTEGER_CST
1902 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1903 def = oprnd1;
1904 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1906 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1907 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1908 && TYPE_PRECISION (TREE_TYPE (rhs1))
1909 == TYPE_PRECISION (type))
1910 def = rhs1;
1913 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1914 if (def == NULL_TREE)
1916 def = vect_recog_temp_ssa_var (type, NULL);
1917 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1918 if (ext_def)
1920 basic_block new_bb
1921 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1922 gcc_assert (!new_bb);
1924 else
1925 append_pattern_def_seq (stmt_vinfo, def_stmt);
1927 stype = TREE_TYPE (def);
1928 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1930 if (TREE_CODE (def) == INTEGER_CST)
1932 if (!tree_fits_uhwi_p (def)
1933 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
1934 || integer_zerop (def))
1935 return NULL;
1936 def2 = build_int_cst (stype,
1937 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
1939 else
1941 tree vecstype = get_vectype_for_scalar_type (stype);
1942 stmt_vec_info def_stmt_vinfo;
1944 if (vecstype == NULL_TREE)
1945 return NULL;
1946 def2 = vect_recog_temp_ssa_var (stype, NULL);
1947 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1948 if (ext_def)
1950 basic_block new_bb
1951 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1952 gcc_assert (!new_bb);
1954 else
1956 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1957 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1958 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1959 append_pattern_def_seq (stmt_vinfo, def_stmt);
1962 def2 = vect_recog_temp_ssa_var (stype, NULL);
1963 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
1964 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1965 gimple_assign_lhs (def_stmt), mask);
1966 if (ext_def)
1968 basic_block new_bb
1969 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1970 gcc_assert (!new_bb);
1972 else
1974 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1975 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1976 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1977 append_pattern_def_seq (stmt_vinfo, def_stmt);
1981 var1 = vect_recog_temp_ssa_var (type, NULL);
1982 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1983 ? LSHIFT_EXPR : RSHIFT_EXPR,
1984 oprnd0, def);
1985 append_pattern_def_seq (stmt_vinfo, def_stmt);
1987 var2 = vect_recog_temp_ssa_var (type, NULL);
1988 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1989 ? RSHIFT_EXPR : LSHIFT_EXPR,
1990 oprnd0, def2);
1991 append_pattern_def_seq (stmt_vinfo, def_stmt);
1993 /* Pattern detected. */
1994 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
1996 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1997 var = vect_recog_temp_ssa_var (type, NULL);
1998 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2000 stmts->safe_push (last_stmt);
2001 return pattern_stmt;
2004 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2005 vectorized:
2007 type a_t;
2008 TYPE b_T, res_T;
2010 S1 a_t = ;
2011 S2 b_T = ;
2012 S3 res_T = b_T op a_t;
2014 where type 'TYPE' is a type with different size than 'type',
2015 and op is <<, >> or rotate.
2017 Also detect cases:
2019 type a_t;
2020 TYPE b_T, c_T, res_T;
2022 S0 c_T = ;
2023 S1 a_t = (type) c_T;
2024 S2 b_T = ;
2025 S3 res_T = b_T op a_t;
2027 Input/Output:
2029 * STMTS: Contains a stmt from which the pattern search begins,
2030 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2031 with a shift/rotate which has same type on both operands, in the
2032 second case just b_T op c_T, in the first case with added cast
2033 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2035 Output:
2037 * TYPE_IN: The type of the input arguments to the pattern.
2039 * TYPE_OUT: The type of the output of this pattern.
2041 * Return value: A new stmt that will be used to replace the shift/rotate
2042 S3 stmt. */
2044 static gimple *
2045 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2046 tree *type_in, tree *type_out)
2048 gimple *last_stmt = stmts->pop ();
2049 tree oprnd0, oprnd1, lhs, var;
2050 gimple *pattern_stmt;
2051 enum tree_code rhs_code;
2052 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2053 vec_info *vinfo = stmt_vinfo->vinfo;
2055 if (!is_gimple_assign (last_stmt))
2056 return NULL;
2058 rhs_code = gimple_assign_rhs_code (last_stmt);
2059 switch (rhs_code)
2061 case LSHIFT_EXPR:
2062 case RSHIFT_EXPR:
2063 case LROTATE_EXPR:
2064 case RROTATE_EXPR:
2065 break;
2066 default:
2067 return NULL;
2070 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2071 return NULL;
2073 lhs = gimple_assign_lhs (last_stmt);
2074 oprnd0 = gimple_assign_rhs1 (last_stmt);
2075 oprnd1 = gimple_assign_rhs2 (last_stmt);
2076 if (TREE_CODE (oprnd0) != SSA_NAME
2077 || TREE_CODE (oprnd1) != SSA_NAME
2078 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2079 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2080 || TYPE_PRECISION (TREE_TYPE (lhs))
2081 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2082 return NULL;
2084 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2085 if (!def_vinfo)
2086 return NULL;
2088 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2089 *type_out = *type_in;
2090 if (*type_in == NULL_TREE)
2091 return NULL;
2093 tree def = NULL_TREE;
2094 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2095 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo)
2096 && def_stmt
2097 && gimple_assign_cast_p (def_stmt))
2099 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2100 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2101 && TYPE_PRECISION (TREE_TYPE (rhs1))
2102 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2104 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2105 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2106 def = rhs1;
2107 else
2109 tree mask
2110 = build_low_bits_mask (TREE_TYPE (rhs1),
2111 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2112 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2113 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2114 stmt_vec_info new_stmt_info
2115 = new_stmt_vec_info (def_stmt, vinfo);
2116 set_vinfo_for_stmt (def_stmt, new_stmt_info);
2117 STMT_VINFO_VECTYPE (new_stmt_info)
2118 = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2119 new_pattern_def_seq (stmt_vinfo, def_stmt);
2124 if (def == NULL_TREE)
2126 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2127 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2128 new_pattern_def_seq (stmt_vinfo, def_stmt);
2131 /* Pattern detected. */
2132 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2134 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2135 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2136 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2138 stmts->safe_push (last_stmt);
2139 return pattern_stmt;
2142 /* Return true iff the target has a vector optab implementing the operation
2143 CODE on type VECTYPE. */
2145 static bool
2146 target_has_vecop_for_code (tree_code code, tree vectype)
2148 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2149 return voptab
2150 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2153 /* Verify that the target has optabs of VECTYPE to perform all the steps
2154 needed by the multiplication-by-immediate synthesis algorithm described by
2155 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2156 present. Return true iff the target supports all the steps. */
2158 static bool
2159 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2160 tree vectype, bool synth_shift_p)
2162 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2163 return false;
2165 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2166 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2168 if (var == negate_variant
2169 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2170 return false;
2172 /* If we must synthesize shifts with additions make sure that vector
2173 addition is available. */
2174 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2175 return false;
2177 for (int i = 1; i < alg->ops; i++)
2179 switch (alg->op[i])
2181 case alg_shift:
2182 break;
2183 case alg_add_t_m2:
2184 case alg_add_t2_m:
2185 case alg_add_factor:
2186 if (!supports_vplus)
2187 return false;
2188 break;
2189 case alg_sub_t_m2:
2190 case alg_sub_t2_m:
2191 case alg_sub_factor:
2192 if (!supports_vminus)
2193 return false;
2194 break;
2195 case alg_unknown:
2196 case alg_m:
2197 case alg_zero:
2198 case alg_impossible:
2199 return false;
2200 default:
2201 gcc_unreachable ();
2205 return true;
2208 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2209 putting the final result in DEST. Append all statements but the last into
2210 VINFO. Return the last statement. */
2212 static gimple *
2213 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2214 stmt_vec_info vinfo)
2216 HOST_WIDE_INT i;
2217 tree itype = TREE_TYPE (op);
2218 tree prev_res = op;
2219 gcc_assert (amnt >= 0);
2220 for (i = 0; i < amnt; i++)
2222 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2223 : dest;
2224 gimple *stmt
2225 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2226 prev_res = tmp_var;
2227 if (i < amnt - 1)
2228 append_pattern_def_seq (vinfo, stmt);
2229 else
2230 return stmt;
2232 gcc_unreachable ();
2233 return NULL;
2236 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2237 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2238 the process if necessary. Append the resulting assignment statements
2239 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2240 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2241 left shifts using additions. */
2243 static tree
2244 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2245 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2247 if (integer_zerop (op2)
2248 && (code == LSHIFT_EXPR
2249 || code == PLUS_EXPR))
2251 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2252 return op1;
2255 gimple *stmt;
2256 tree itype = TREE_TYPE (op1);
2257 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2259 if (code == LSHIFT_EXPR
2260 && synth_shift_p)
2262 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2263 stmt_vinfo);
2264 append_pattern_def_seq (stmt_vinfo, stmt);
2265 return tmp_var;
2268 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2269 append_pattern_def_seq (stmt_vinfo, stmt);
2270 return tmp_var;
2273 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2274 and simple arithmetic operations to be vectorized. Record the statements
2275 produced in STMT_VINFO and return the last statement in the sequence or
2276 NULL if it's not possible to synthesize such a multiplication.
2277 This function mirrors the behavior of expand_mult_const in expmed.c but
2278 works on tree-ssa form. */
2280 static gimple *
2281 vect_synth_mult_by_constant (tree op, tree val,
2282 stmt_vec_info stmt_vinfo)
2284 tree itype = TREE_TYPE (op);
2285 machine_mode mode = TYPE_MODE (itype);
2286 struct algorithm alg;
2287 mult_variant variant;
2288 if (!tree_fits_shwi_p (val))
2289 return NULL;
2291 /* Multiplication synthesis by shifts, adds and subs can introduce
2292 signed overflow where the original operation didn't. Perform the
2293 operations on an unsigned type and cast back to avoid this.
2294 In the future we may want to relax this for synthesis algorithms
2295 that we can prove do not cause unexpected overflow. */
2296 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2298 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2300 /* Targets that don't support vector shifts but support vector additions
2301 can synthesize shifts that way. */
2302 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2304 HOST_WIDE_INT hwval = tree_to_shwi (val);
2305 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2306 The vectorizer's benefit analysis will decide whether it's beneficial
2307 to do this. */
2308 bool possible = choose_mult_variant (mode, hwval, &alg,
2309 &variant, MAX_COST);
2310 if (!possible)
2311 return NULL;
2313 tree vectype = get_vectype_for_scalar_type (multtype);
2315 if (!vectype
2316 || !target_supports_mult_synth_alg (&alg, variant,
2317 vectype, synth_shift_p))
2318 return NULL;
2320 tree accumulator;
2322 /* Clear out the sequence of statements so we can populate it below. */
2323 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2324 gimple *stmt = NULL;
2326 if (cast_to_unsigned_p)
2328 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2329 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2330 append_pattern_def_seq (stmt_vinfo, stmt);
2331 op = tmp_op;
2334 if (alg.op[0] == alg_zero)
2335 accumulator = build_int_cst (multtype, 0);
2336 else
2337 accumulator = op;
2339 bool needs_fixup = (variant == negate_variant)
2340 || (variant == add_variant);
2342 for (int i = 1; i < alg.ops; i++)
2344 tree shft_log = build_int_cst (multtype, alg.log[i]);
2345 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2346 tree tmp_var = NULL_TREE;
2348 switch (alg.op[i])
2350 case alg_shift:
2351 if (synth_shift_p)
2352 stmt
2353 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2354 stmt_vinfo);
2355 else
2356 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2357 shft_log);
2358 break;
2359 case alg_add_t_m2:
2360 tmp_var
2361 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2362 stmt_vinfo, synth_shift_p);
2363 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2364 tmp_var);
2365 break;
2366 case alg_sub_t_m2:
2367 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2368 shft_log, stmt_vinfo,
2369 synth_shift_p);
2370 /* In some algorithms the first step involves zeroing the
2371 accumulator. If subtracting from such an accumulator
2372 just emit the negation directly. */
2373 if (integer_zerop (accumulator))
2374 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2375 else
2376 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2377 tmp_var);
2378 break;
2379 case alg_add_t2_m:
2380 tmp_var
2381 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2382 stmt_vinfo, synth_shift_p);
2383 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2384 break;
2385 case alg_sub_t2_m:
2386 tmp_var
2387 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2388 stmt_vinfo, synth_shift_p);
2389 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2390 break;
2391 case alg_add_factor:
2392 tmp_var
2393 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2394 stmt_vinfo, synth_shift_p);
2395 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2396 tmp_var);
2397 break;
2398 case alg_sub_factor:
2399 tmp_var
2400 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2401 stmt_vinfo, synth_shift_p);
2402 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2403 accumulator);
2404 break;
2405 default:
2406 gcc_unreachable ();
2408 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2409 but rather return it directly. */
2411 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2412 append_pattern_def_seq (stmt_vinfo, stmt);
2413 accumulator = accum_tmp;
2415 if (variant == negate_variant)
2417 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2418 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2419 accumulator = accum_tmp;
2420 if (cast_to_unsigned_p)
2421 append_pattern_def_seq (stmt_vinfo, stmt);
2423 else if (variant == add_variant)
2425 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2426 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2427 accumulator = accum_tmp;
2428 if (cast_to_unsigned_p)
2429 append_pattern_def_seq (stmt_vinfo, stmt);
2431 /* Move back to a signed if needed. */
2432 if (cast_to_unsigned_p)
2434 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2435 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2438 return stmt;
2441 /* Detect multiplication by constant and convert it into a sequence of
2442 shifts and additions, subtractions, negations. We reuse the
2443 choose_mult_variant algorithms from expmed.c
2445 Input/Output:
2447 STMTS: Contains a stmt from which the pattern search begins,
2448 i.e. the mult stmt.
2450 Output:
2452 * TYPE_IN: The type of the input arguments to the pattern.
2454 * TYPE_OUT: The type of the output of this pattern.
2456 * Return value: A new stmt that will be used to replace
2457 the multiplication. */
2459 static gimple *
2460 vect_recog_mult_pattern (vec<gimple *> *stmts,
2461 tree *type_in, tree *type_out)
2463 gimple *last_stmt = stmts->pop ();
2464 tree oprnd0, oprnd1, vectype, itype;
2465 gimple *pattern_stmt;
2466 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2468 if (!is_gimple_assign (last_stmt))
2469 return NULL;
2471 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2472 return NULL;
2474 oprnd0 = gimple_assign_rhs1 (last_stmt);
2475 oprnd1 = gimple_assign_rhs2 (last_stmt);
2476 itype = TREE_TYPE (oprnd0);
2478 if (TREE_CODE (oprnd0) != SSA_NAME
2479 || TREE_CODE (oprnd1) != INTEGER_CST
2480 || !INTEGRAL_TYPE_P (itype)
2481 || !type_has_mode_precision_p (itype))
2482 return NULL;
2484 vectype = get_vectype_for_scalar_type (itype);
2485 if (vectype == NULL_TREE)
2486 return NULL;
2488 /* If the target can handle vectorized multiplication natively,
2489 don't attempt to optimize this. */
2490 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2491 if (mul_optab != unknown_optab)
2493 machine_mode vec_mode = TYPE_MODE (vectype);
2494 int icode = (int) optab_handler (mul_optab, vec_mode);
2495 if (icode != CODE_FOR_nothing)
2496 return NULL;
2499 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2500 if (!pattern_stmt)
2501 return NULL;
2503 /* Pattern detected. */
2504 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2506 stmts->safe_push (last_stmt);
2507 *type_in = vectype;
2508 *type_out = vectype;
2510 return pattern_stmt;
2513 /* Detect a signed division by a constant that wouldn't be
2514 otherwise vectorized:
2516 type a_t, b_t;
2518 S1 a_t = b_t / N;
2520 where type 'type' is an integral type and N is a constant.
2522 Similarly handle modulo by a constant:
2524 S4 a_t = b_t % N;
2526 Input/Output:
2528 * STMTS: Contains a stmt from which the pattern search begins,
2529 i.e. the division stmt. S1 is replaced by if N is a power
2530 of two constant and type is signed:
2531 S3 y_t = b_t < 0 ? N - 1 : 0;
2532 S2 x_t = b_t + y_t;
2533 S1' a_t = x_t >> log2 (N);
2535 S4 is replaced if N is a power of two constant and
2536 type is signed by (where *_T temporaries have unsigned type):
2537 S9 y_T = b_t < 0 ? -1U : 0U;
2538 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2539 S7 z_t = (type) z_T;
2540 S6 w_t = b_t + z_t;
2541 S5 x_t = w_t & (N - 1);
2542 S4' a_t = x_t - z_t;
2544 Output:
2546 * TYPE_IN: The type of the input arguments to the pattern.
2548 * TYPE_OUT: The type of the output of this pattern.
2550 * Return value: A new stmt that will be used to replace the division
2551 S1 or modulo S4 stmt. */
2553 static gimple *
2554 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2555 tree *type_in, tree *type_out)
2557 gimple *last_stmt = stmts->pop ();
2558 tree oprnd0, oprnd1, vectype, itype, cond;
2559 gimple *pattern_stmt, *def_stmt;
2560 enum tree_code rhs_code;
2561 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2562 vec_info *vinfo = stmt_vinfo->vinfo;
2563 optab optab;
2564 tree q;
2565 int dummy_int, prec;
2566 stmt_vec_info def_stmt_vinfo;
2568 if (!is_gimple_assign (last_stmt))
2569 return NULL;
2571 rhs_code = gimple_assign_rhs_code (last_stmt);
2572 switch (rhs_code)
2574 case TRUNC_DIV_EXPR:
2575 case TRUNC_MOD_EXPR:
2576 break;
2577 default:
2578 return NULL;
2581 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2582 return NULL;
2584 oprnd0 = gimple_assign_rhs1 (last_stmt);
2585 oprnd1 = gimple_assign_rhs2 (last_stmt);
2586 itype = TREE_TYPE (oprnd0);
2587 if (TREE_CODE (oprnd0) != SSA_NAME
2588 || TREE_CODE (oprnd1) != INTEGER_CST
2589 || TREE_CODE (itype) != INTEGER_TYPE
2590 || !type_has_mode_precision_p (itype))
2591 return NULL;
2593 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2594 vectype = get_vectype_for_scalar_type (itype);
2595 if (vectype == NULL_TREE)
2596 return NULL;
2598 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2600 /* If the target can handle vectorized division or modulo natively,
2601 don't attempt to optimize this, since native division is likely
2602 to give smaller code. */
2603 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2604 if (optab != unknown_optab)
2606 machine_mode vec_mode = TYPE_MODE (vectype);
2607 int icode = (int) optab_handler (optab, vec_mode);
2608 if (icode != CODE_FOR_nothing)
2609 return NULL;
2613 prec = TYPE_PRECISION (itype);
2614 if (integer_pow2p (oprnd1))
2616 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2617 return NULL;
2619 /* Pattern detected. */
2620 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2622 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2623 build_int_cst (itype, 0));
2624 if (rhs_code == TRUNC_DIV_EXPR)
2626 tree var = vect_recog_temp_ssa_var (itype, NULL);
2627 tree shift;
2628 def_stmt
2629 = gimple_build_assign (var, COND_EXPR, cond,
2630 fold_build2 (MINUS_EXPR, itype, oprnd1,
2631 build_int_cst (itype, 1)),
2632 build_int_cst (itype, 0));
2633 new_pattern_def_seq (stmt_vinfo, def_stmt);
2634 var = vect_recog_temp_ssa_var (itype, NULL);
2635 def_stmt
2636 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2637 gimple_assign_lhs (def_stmt));
2638 append_pattern_def_seq (stmt_vinfo, def_stmt);
2640 shift = build_int_cst (itype, tree_log2 (oprnd1));
2641 pattern_stmt
2642 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2643 RSHIFT_EXPR, var, shift);
2645 else
2647 tree signmask;
2648 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2649 if (compare_tree_int (oprnd1, 2) == 0)
2651 signmask = vect_recog_temp_ssa_var (itype, NULL);
2652 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2653 build_int_cst (itype, 1),
2654 build_int_cst (itype, 0));
2655 append_pattern_def_seq (stmt_vinfo, def_stmt);
2657 else
2659 tree utype
2660 = build_nonstandard_integer_type (prec, 1);
2661 tree vecutype = get_vectype_for_scalar_type (utype);
2662 tree shift
2663 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2664 - tree_log2 (oprnd1));
2665 tree var = vect_recog_temp_ssa_var (utype, NULL);
2667 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2668 build_int_cst (utype, -1),
2669 build_int_cst (utype, 0));
2670 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2671 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2672 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2673 append_pattern_def_seq (stmt_vinfo, def_stmt);
2674 var = vect_recog_temp_ssa_var (utype, NULL);
2675 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2676 gimple_assign_lhs (def_stmt),
2677 shift);
2678 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2679 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2680 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2681 append_pattern_def_seq (stmt_vinfo, def_stmt);
2682 signmask = vect_recog_temp_ssa_var (itype, NULL);
2683 def_stmt
2684 = gimple_build_assign (signmask, NOP_EXPR, var);
2685 append_pattern_def_seq (stmt_vinfo, def_stmt);
2687 def_stmt
2688 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2689 PLUS_EXPR, oprnd0, signmask);
2690 append_pattern_def_seq (stmt_vinfo, def_stmt);
2691 def_stmt
2692 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2693 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2694 fold_build2 (MINUS_EXPR, itype, oprnd1,
2695 build_int_cst (itype, 1)));
2696 append_pattern_def_seq (stmt_vinfo, def_stmt);
2698 pattern_stmt
2699 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2700 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2701 signmask);
2704 stmts->safe_push (last_stmt);
2706 *type_in = vectype;
2707 *type_out = vectype;
2708 return pattern_stmt;
2711 if (prec > HOST_BITS_PER_WIDE_INT
2712 || integer_zerop (oprnd1))
2713 return NULL;
2715 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2716 return NULL;
2718 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2720 if (TYPE_UNSIGNED (itype))
2722 unsigned HOST_WIDE_INT mh, ml;
2723 int pre_shift, post_shift;
2724 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2725 & GET_MODE_MASK (itype_mode));
2726 tree t1, t2, t3, t4;
2728 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2729 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2730 return NULL;
2732 /* Find a suitable multiplier and right shift count
2733 instead of multiplying with D. */
2734 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2736 /* If the suggested multiplier is more than SIZE bits, we can do better
2737 for even divisors, using an initial right shift. */
2738 if (mh != 0 && (d & 1) == 0)
2740 pre_shift = ctz_or_zero (d);
2741 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2742 &ml, &post_shift, &dummy_int);
2743 gcc_assert (!mh);
2745 else
2746 pre_shift = 0;
2748 if (mh != 0)
2750 if (post_shift - 1 >= prec)
2751 return NULL;
2753 /* t1 = oprnd0 h* ml;
2754 t2 = oprnd0 - t1;
2755 t3 = t2 >> 1;
2756 t4 = t1 + t3;
2757 q = t4 >> (post_shift - 1); */
2758 t1 = vect_recog_temp_ssa_var (itype, NULL);
2759 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2760 build_int_cst (itype, ml));
2761 append_pattern_def_seq (stmt_vinfo, def_stmt);
2763 t2 = vect_recog_temp_ssa_var (itype, NULL);
2764 def_stmt
2765 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2766 append_pattern_def_seq (stmt_vinfo, def_stmt);
2768 t3 = vect_recog_temp_ssa_var (itype, NULL);
2769 def_stmt
2770 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2771 append_pattern_def_seq (stmt_vinfo, def_stmt);
2773 t4 = vect_recog_temp_ssa_var (itype, NULL);
2774 def_stmt
2775 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2777 if (post_shift != 1)
2779 append_pattern_def_seq (stmt_vinfo, def_stmt);
2781 q = vect_recog_temp_ssa_var (itype, NULL);
2782 pattern_stmt
2783 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2784 build_int_cst (itype, post_shift - 1));
2786 else
2788 q = t4;
2789 pattern_stmt = def_stmt;
2792 else
2794 if (pre_shift >= prec || post_shift >= prec)
2795 return NULL;
2797 /* t1 = oprnd0 >> pre_shift;
2798 t2 = t1 h* ml;
2799 q = t2 >> post_shift; */
2800 if (pre_shift)
2802 t1 = vect_recog_temp_ssa_var (itype, NULL);
2803 def_stmt
2804 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2805 build_int_cst (NULL, pre_shift));
2806 append_pattern_def_seq (stmt_vinfo, def_stmt);
2808 else
2809 t1 = oprnd0;
2811 t2 = vect_recog_temp_ssa_var (itype, NULL);
2812 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2813 build_int_cst (itype, ml));
2815 if (post_shift)
2817 append_pattern_def_seq (stmt_vinfo, def_stmt);
2819 q = vect_recog_temp_ssa_var (itype, NULL);
2820 def_stmt
2821 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2822 build_int_cst (itype, post_shift));
2824 else
2825 q = t2;
2827 pattern_stmt = def_stmt;
2830 else
2832 unsigned HOST_WIDE_INT ml;
2833 int post_shift;
2834 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2835 unsigned HOST_WIDE_INT abs_d;
2836 bool add = false;
2837 tree t1, t2, t3, t4;
2839 /* Give up for -1. */
2840 if (d == -1)
2841 return NULL;
2843 /* Since d might be INT_MIN, we have to cast to
2844 unsigned HOST_WIDE_INT before negating to avoid
2845 undefined signed overflow. */
2846 abs_d = (d >= 0
2847 ? (unsigned HOST_WIDE_INT) d
2848 : - (unsigned HOST_WIDE_INT) d);
2850 /* n rem d = n rem -d */
2851 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2853 d = abs_d;
2854 oprnd1 = build_int_cst (itype, abs_d);
2856 else if (HOST_BITS_PER_WIDE_INT >= prec
2857 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2858 /* This case is not handled correctly below. */
2859 return NULL;
2861 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2862 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2864 add = true;
2865 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2867 if (post_shift >= prec)
2868 return NULL;
2870 /* t1 = oprnd0 h* ml; */
2871 t1 = vect_recog_temp_ssa_var (itype, NULL);
2872 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2873 build_int_cst (itype, ml));
2875 if (add)
2877 /* t2 = t1 + oprnd0; */
2878 append_pattern_def_seq (stmt_vinfo, def_stmt);
2879 t2 = vect_recog_temp_ssa_var (itype, NULL);
2880 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2882 else
2883 t2 = t1;
2885 if (post_shift)
2887 /* t3 = t2 >> post_shift; */
2888 append_pattern_def_seq (stmt_vinfo, def_stmt);
2889 t3 = vect_recog_temp_ssa_var (itype, NULL);
2890 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2891 build_int_cst (itype, post_shift));
2893 else
2894 t3 = t2;
2896 wide_int oprnd0_min, oprnd0_max;
2897 int msb = 1;
2898 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2900 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2901 msb = 0;
2902 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2903 msb = -1;
2906 if (msb == 0 && d >= 0)
2908 /* q = t3; */
2909 q = t3;
2910 pattern_stmt = def_stmt;
2912 else
2914 /* t4 = oprnd0 >> (prec - 1);
2915 or if we know from VRP that oprnd0 >= 0
2916 t4 = 0;
2917 or if we know from VRP that oprnd0 < 0
2918 t4 = -1; */
2919 append_pattern_def_seq (stmt_vinfo, def_stmt);
2920 t4 = vect_recog_temp_ssa_var (itype, NULL);
2921 if (msb != 1)
2922 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2923 build_int_cst (itype, msb));
2924 else
2925 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2926 build_int_cst (itype, prec - 1));
2927 append_pattern_def_seq (stmt_vinfo, def_stmt);
2929 /* q = t3 - t4; or q = t4 - t3; */
2930 q = vect_recog_temp_ssa_var (itype, NULL);
2931 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2932 d < 0 ? t3 : t4);
2936 if (rhs_code == TRUNC_MOD_EXPR)
2938 tree r, t1;
2940 /* We divided. Now finish by:
2941 t1 = q * oprnd1;
2942 r = oprnd0 - t1; */
2943 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2945 t1 = vect_recog_temp_ssa_var (itype, NULL);
2946 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2947 append_pattern_def_seq (stmt_vinfo, def_stmt);
2949 r = vect_recog_temp_ssa_var (itype, NULL);
2950 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2953 /* Pattern detected. */
2954 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2956 stmts->safe_push (last_stmt);
2958 *type_in = vectype;
2959 *type_out = vectype;
2960 return pattern_stmt;
2963 /* Function vect_recog_mixed_size_cond_pattern
2965 Try to find the following pattern:
2967 type x_t, y_t;
2968 TYPE a_T, b_T, c_T;
2969 loop:
2970 S1 a_T = x_t CMP y_t ? b_T : c_T;
2972 where type 'TYPE' is an integral type which has different size
2973 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2974 than 'type', the constants need to fit into an integer type
2975 with the same width as 'type') or results of conversion from 'type'.
2977 Input:
2979 * LAST_STMT: A stmt from which the pattern search begins.
2981 Output:
2983 * TYPE_IN: The type of the input arguments to the pattern.
2985 * TYPE_OUT: The type of the output of this pattern.
2987 * Return value: A new stmt that will be used to replace the pattern.
2988 Additionally a def_stmt is added.
2990 a_it = x_t CMP y_t ? b_it : c_it;
2991 a_T = (TYPE) a_it; */
2993 static gimple *
2994 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
2995 tree *type_out)
2997 gimple *last_stmt = (*stmts)[0];
2998 tree cond_expr, then_clause, else_clause;
2999 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3000 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3001 gimple *pattern_stmt, *def_stmt;
3002 vec_info *vinfo = stmt_vinfo->vinfo;
3003 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3004 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3005 bool promotion;
3006 tree comp_scalar_type;
3008 if (!is_gimple_assign (last_stmt)
3009 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3010 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3011 return NULL;
3013 cond_expr = gimple_assign_rhs1 (last_stmt);
3014 then_clause = gimple_assign_rhs2 (last_stmt);
3015 else_clause = gimple_assign_rhs3 (last_stmt);
3017 if (!COMPARISON_CLASS_P (cond_expr))
3018 return NULL;
3020 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3021 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3022 if (comp_vectype == NULL_TREE)
3023 return NULL;
3025 type = gimple_expr_type (last_stmt);
3026 if (types_compatible_p (type, comp_scalar_type)
3027 || ((TREE_CODE (then_clause) != INTEGER_CST
3028 || TREE_CODE (else_clause) != INTEGER_CST)
3029 && !INTEGRAL_TYPE_P (comp_scalar_type))
3030 || !INTEGRAL_TYPE_P (type))
3031 return NULL;
3033 if ((TREE_CODE (then_clause) != INTEGER_CST
3034 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3035 &def_stmt0, &promotion))
3036 || (TREE_CODE (else_clause) != INTEGER_CST
3037 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3038 &def_stmt1, &promotion)))
3039 return NULL;
3041 if (orig_type0 && orig_type1
3042 && !types_compatible_p (orig_type0, orig_type1))
3043 return NULL;
3045 if (orig_type0)
3047 if (!types_compatible_p (orig_type0, comp_scalar_type))
3048 return NULL;
3049 then_clause = gimple_assign_rhs1 (def_stmt0);
3050 itype = orig_type0;
3053 if (orig_type1)
3055 if (!types_compatible_p (orig_type1, comp_scalar_type))
3056 return NULL;
3057 else_clause = gimple_assign_rhs1 (def_stmt1);
3058 itype = orig_type1;
3062 HOST_WIDE_INT cmp_mode_size
3063 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3065 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3066 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3067 return NULL;
3069 vectype = get_vectype_for_scalar_type (type);
3070 if (vectype == NULL_TREE)
3071 return NULL;
3073 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3074 return NULL;
3076 if (itype == NULL_TREE)
3077 itype = build_nonstandard_integer_type (cmp_mode_size,
3078 TYPE_UNSIGNED (type));
3080 if (itype == NULL_TREE
3081 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3082 return NULL;
3084 vecitype = get_vectype_for_scalar_type (itype);
3085 if (vecitype == NULL_TREE)
3086 return NULL;
3088 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3089 return NULL;
3091 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3093 if ((TREE_CODE (then_clause) == INTEGER_CST
3094 && !int_fits_type_p (then_clause, itype))
3095 || (TREE_CODE (else_clause) == INTEGER_CST
3096 && !int_fits_type_p (else_clause, itype)))
3097 return NULL;
3100 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3101 COND_EXPR, unshare_expr (cond_expr),
3102 fold_convert (itype, then_clause),
3103 fold_convert (itype, else_clause));
3104 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3105 NOP_EXPR, gimple_assign_lhs (def_stmt));
3107 new_pattern_def_seq (stmt_vinfo, def_stmt);
3108 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3109 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3110 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3111 *type_in = vecitype;
3112 *type_out = vectype;
3114 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3116 return pattern_stmt;
3120 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3121 true if bool VAR can and should be optimized that way. Assume it shouldn't
3122 in case it's a result of a comparison which can be directly vectorized into
3123 a vector comparison. Fills in STMTS with all stmts visited during the
3124 walk. */
3126 static bool
3127 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3129 tree rhs1;
3130 enum tree_code rhs_code;
3132 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3133 if (!def_stmt_info)
3134 return false;
3136 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3137 if (!def_stmt)
3138 return false;
3140 if (stmts.contains (def_stmt))
3141 return true;
3143 rhs1 = gimple_assign_rhs1 (def_stmt);
3144 rhs_code = gimple_assign_rhs_code (def_stmt);
3145 switch (rhs_code)
3147 case SSA_NAME:
3148 if (! check_bool_pattern (rhs1, vinfo, stmts))
3149 return false;
3150 break;
3152 CASE_CONVERT:
3153 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3154 return false;
3155 if (! check_bool_pattern (rhs1, vinfo, stmts))
3156 return false;
3157 break;
3159 case BIT_NOT_EXPR:
3160 if (! check_bool_pattern (rhs1, vinfo, stmts))
3161 return false;
3162 break;
3164 case BIT_AND_EXPR:
3165 case BIT_IOR_EXPR:
3166 case BIT_XOR_EXPR:
3167 if (! check_bool_pattern (rhs1, vinfo, stmts)
3168 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3169 return false;
3170 break;
3172 default:
3173 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3175 tree vecitype, comp_vectype;
3177 /* If the comparison can throw, then is_gimple_condexpr will be
3178 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3179 if (stmt_could_throw_p (def_stmt))
3180 return false;
3182 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3183 if (comp_vectype == NULL_TREE)
3184 return false;
3186 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3187 if (mask_type
3188 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3189 return false;
3191 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3193 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3194 tree itype
3195 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3196 vecitype = get_vectype_for_scalar_type (itype);
3197 if (vecitype == NULL_TREE)
3198 return false;
3200 else
3201 vecitype = comp_vectype;
3202 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3203 return false;
3205 else
3206 return false;
3207 break;
3210 bool res = stmts.add (def_stmt);
3211 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3212 gcc_assert (!res);
3214 return true;
3218 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3219 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3220 pattern sequence. */
3222 static tree
3223 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3225 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3226 NOP_EXPR, var);
3227 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3228 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3229 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3230 append_pattern_def_seq (stmt_info, cast_stmt);
3231 return gimple_assign_lhs (cast_stmt);
3234 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3235 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3236 type, OUT_TYPE is the desired final integer type of the whole pattern.
3237 STMT_INFO is the info of the pattern root and is where pattern stmts should
3238 be associated with. DEFS is a map of pattern defs. */
3240 static void
3241 adjust_bool_pattern (tree var, tree out_type,
3242 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3244 gimple *stmt = SSA_NAME_DEF_STMT (var);
3245 enum tree_code rhs_code, def_rhs_code;
3246 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3247 location_t loc;
3248 gimple *pattern_stmt, *def_stmt;
3249 tree trueval = NULL_TREE;
3251 rhs1 = gimple_assign_rhs1 (stmt);
3252 rhs2 = gimple_assign_rhs2 (stmt);
3253 rhs_code = gimple_assign_rhs_code (stmt);
3254 loc = gimple_location (stmt);
3255 switch (rhs_code)
3257 case SSA_NAME:
3258 CASE_CONVERT:
3259 irhs1 = *defs.get (rhs1);
3260 itype = TREE_TYPE (irhs1);
3261 pattern_stmt
3262 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3263 SSA_NAME, irhs1);
3264 break;
3266 case BIT_NOT_EXPR:
3267 irhs1 = *defs.get (rhs1);
3268 itype = TREE_TYPE (irhs1);
3269 pattern_stmt
3270 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3271 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3272 break;
3274 case BIT_AND_EXPR:
3275 /* Try to optimize x = y & (a < b ? 1 : 0); into
3276 x = (a < b ? y : 0);
3278 E.g. for:
3279 bool a_b, b_b, c_b;
3280 TYPE d_T;
3282 S1 a_b = x1 CMP1 y1;
3283 S2 b_b = x2 CMP2 y2;
3284 S3 c_b = a_b & b_b;
3285 S4 d_T = (TYPE) c_b;
3287 we would normally emit:
3289 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3290 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3291 S3' c_T = a_T & b_T;
3292 S4' d_T = c_T;
3294 but we can save one stmt by using the
3295 result of one of the COND_EXPRs in the other COND_EXPR and leave
3296 BIT_AND_EXPR stmt out:
3298 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3299 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3300 S4' f_T = c_T;
3302 At least when VEC_COND_EXPR is implemented using masks
3303 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3304 computes the comparison masks and ands it, in one case with
3305 all ones vector, in the other case with a vector register.
3306 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3307 often more expensive. */
3308 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3309 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3310 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3312 irhs1 = *defs.get (rhs1);
3313 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3314 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3315 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3317 rhs_code = def_rhs_code;
3318 rhs1 = def_rhs1;
3319 rhs2 = gimple_assign_rhs2 (def_stmt);
3320 trueval = irhs1;
3321 goto do_compare;
3323 else
3324 irhs2 = *defs.get (rhs2);
3325 goto and_ior_xor;
3327 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3328 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3329 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3331 irhs2 = *defs.get (rhs2);
3332 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3333 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3334 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3336 rhs_code = def_rhs_code;
3337 rhs1 = def_rhs1;
3338 rhs2 = gimple_assign_rhs2 (def_stmt);
3339 trueval = irhs2;
3340 goto do_compare;
3342 else
3343 irhs1 = *defs.get (rhs1);
3344 goto and_ior_xor;
3346 /* FALLTHRU */
3347 case BIT_IOR_EXPR:
3348 case BIT_XOR_EXPR:
3349 irhs1 = *defs.get (rhs1);
3350 irhs2 = *defs.get (rhs2);
3351 and_ior_xor:
3352 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3353 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3355 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3356 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3357 int out_prec = TYPE_PRECISION (out_type);
3358 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3359 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3360 stmt_info);
3361 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3362 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3363 stmt_info);
3364 else
3366 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3367 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3370 itype = TREE_TYPE (irhs1);
3371 pattern_stmt
3372 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3373 rhs_code, irhs1, irhs2);
3374 break;
3376 default:
3377 do_compare:
3378 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3379 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3380 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3381 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3382 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3384 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3385 itype
3386 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3388 else
3389 itype = TREE_TYPE (rhs1);
3390 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3391 if (trueval == NULL_TREE)
3392 trueval = build_int_cst (itype, 1);
3393 else
3394 gcc_checking_assert (useless_type_conversion_p (itype,
3395 TREE_TYPE (trueval)));
3396 pattern_stmt
3397 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3398 COND_EXPR, cond_expr, trueval,
3399 build_int_cst (itype, 0));
3400 break;
3403 gimple_set_location (pattern_stmt, loc);
3404 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3405 pattern def seq stmts instead of just letting auto-detection do
3406 its work? */
3407 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3408 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3409 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3410 append_pattern_def_seq (stmt_info, pattern_stmt);
3411 defs.put (var, gimple_assign_lhs (pattern_stmt));
3414 /* Comparison function to qsort a vector of gimple stmts after UID. */
3416 static int
3417 sort_after_uid (const void *p1, const void *p2)
3419 const gimple *stmt1 = *(const gimple * const *)p1;
3420 const gimple *stmt2 = *(const gimple * const *)p2;
3421 return gimple_uid (stmt1) - gimple_uid (stmt2);
3424 /* Create pattern stmts for all stmts participating in the bool pattern
3425 specified by BOOL_STMT_SET and its root STMT with the desired type
3426 OUT_TYPE. Return the def of the pattern root. */
3428 static tree
3429 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3430 tree out_type, gimple *stmt)
3432 /* Gather original stmts in the bool pattern in their order of appearance
3433 in the IL. */
3434 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3435 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3436 i != bool_stmt_set.end (); ++i)
3437 bool_stmts.quick_push (*i);
3438 bool_stmts.qsort (sort_after_uid);
3440 /* Now process them in that order, producing pattern stmts. */
3441 hash_map <tree, tree> defs;
3442 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3443 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3444 out_type, vinfo_for_stmt (stmt), defs);
3446 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3447 gimple *pattern_stmt
3448 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3449 return gimple_assign_lhs (pattern_stmt);
3452 /* Helper for search_type_for_mask. */
3454 static tree
3455 search_type_for_mask_1 (tree var, vec_info *vinfo,
3456 hash_map<gimple *, tree> &cache)
3458 tree rhs1;
3459 enum tree_code rhs_code;
3460 tree res = NULL_TREE, res2;
3462 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3463 return NULL_TREE;
3465 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3466 if (!def_stmt_info)
3467 return NULL_TREE;
3469 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3470 if (!def_stmt)
3471 return NULL_TREE;
3473 tree *c = cache.get (def_stmt);
3474 if (c)
3475 return *c;
3477 rhs_code = gimple_assign_rhs_code (def_stmt);
3478 rhs1 = gimple_assign_rhs1 (def_stmt);
3480 switch (rhs_code)
3482 case SSA_NAME:
3483 case BIT_NOT_EXPR:
3484 CASE_CONVERT:
3485 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3486 break;
3488 case BIT_AND_EXPR:
3489 case BIT_IOR_EXPR:
3490 case BIT_XOR_EXPR:
3491 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3492 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3493 cache);
3494 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3495 res = res2;
3496 break;
3498 default:
3499 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3501 tree comp_vectype, mask_type;
3503 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3505 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3506 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3507 vinfo, cache);
3508 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3509 res = res2;
3510 break;
3513 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3514 if (comp_vectype == NULL_TREE)
3516 res = NULL_TREE;
3517 break;
3520 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3521 if (!mask_type
3522 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3524 res = NULL_TREE;
3525 break;
3528 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3529 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3531 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3532 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3534 else
3535 res = TREE_TYPE (rhs1);
3539 cache.put (def_stmt, res);
3540 return res;
3543 /* Return the proper type for converting bool VAR into
3544 an integer value or NULL_TREE if no such type exists.
3545 The type is chosen so that converted value has the
3546 same number of elements as VAR's vector type. */
3548 static tree
3549 search_type_for_mask (tree var, vec_info *vinfo)
3551 hash_map<gimple *, tree> cache;
3552 return search_type_for_mask_1 (var, vinfo, cache);
3555 /* Function vect_recog_bool_pattern
3557 Try to find pattern like following:
3559 bool a_b, b_b, c_b, d_b, e_b;
3560 TYPE f_T;
3561 loop:
3562 S1 a_b = x1 CMP1 y1;
3563 S2 b_b = x2 CMP2 y2;
3564 S3 c_b = a_b & b_b;
3565 S4 d_b = x3 CMP3 y3;
3566 S5 e_b = c_b | d_b;
3567 S6 f_T = (TYPE) e_b;
3569 where type 'TYPE' is an integral type. Or a similar pattern
3570 ending in
3572 S6 f_Y = e_b ? r_Y : s_Y;
3574 as results from if-conversion of a complex condition.
3576 Input:
3578 * LAST_STMT: A stmt at the end from which the pattern
3579 search begins, i.e. cast of a bool to
3580 an integer type.
3582 Output:
3584 * TYPE_IN: The type of the input arguments to the pattern.
3586 * TYPE_OUT: The type of the output of this pattern.
3588 * Return value: A new stmt that will be used to replace the pattern.
3590 Assuming size of TYPE is the same as size of all comparisons
3591 (otherwise some casts would be added where needed), the above
3592 sequence we create related pattern stmts:
3593 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3594 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3595 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3596 S5' e_T = c_T | d_T;
3597 S6' f_T = e_T;
3599 Instead of the above S3' we could emit:
3600 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3601 S3' c_T = a_T | b_T;
3602 but the above is more efficient. */
3604 static gimple *
3605 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3606 tree *type_out)
3608 gimple *last_stmt = stmts->pop ();
3609 enum tree_code rhs_code;
3610 tree var, lhs, rhs, vectype;
3611 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3612 stmt_vec_info new_stmt_info;
3613 vec_info *vinfo = stmt_vinfo->vinfo;
3614 gimple *pattern_stmt;
3616 if (!is_gimple_assign (last_stmt))
3617 return NULL;
3619 var = gimple_assign_rhs1 (last_stmt);
3620 lhs = gimple_assign_lhs (last_stmt);
3622 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3623 return NULL;
3625 hash_set<gimple *> bool_stmts;
3627 rhs_code = gimple_assign_rhs_code (last_stmt);
3628 if (CONVERT_EXPR_CODE_P (rhs_code))
3630 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3631 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3632 return NULL;
3633 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3634 if (vectype == NULL_TREE)
3635 return NULL;
3637 if (check_bool_pattern (var, vinfo, bool_stmts))
3639 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3640 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3641 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3642 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3643 else
3644 pattern_stmt
3645 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3647 else
3649 tree type = search_type_for_mask (var, vinfo);
3650 tree cst0, cst1, tmp;
3652 if (!type)
3653 return NULL;
3655 /* We may directly use cond with narrowed type to avoid
3656 multiple cond exprs with following result packing and
3657 perform single cond with packed mask instead. In case
3658 of widening we better make cond first and then extract
3659 results. */
3660 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3661 type = TREE_TYPE (lhs);
3663 cst0 = build_int_cst (type, 0);
3664 cst1 = build_int_cst (type, 1);
3665 tmp = vect_recog_temp_ssa_var (type, NULL);
3666 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3668 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3670 tree new_vectype = get_vectype_for_scalar_type (type);
3671 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3672 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3673 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3674 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3676 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3677 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3681 *type_out = vectype;
3682 *type_in = vectype;
3683 stmts->safe_push (last_stmt);
3684 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3686 return pattern_stmt;
3688 else if (rhs_code == COND_EXPR
3689 && TREE_CODE (var) == SSA_NAME)
3691 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3692 if (vectype == NULL_TREE)
3693 return NULL;
3695 /* Build a scalar type for the boolean result that when
3696 vectorized matches the vector type of the result in
3697 size and number of elements. */
3698 unsigned prec
3699 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3700 TYPE_VECTOR_SUBPARTS (vectype));
3702 tree type
3703 = build_nonstandard_integer_type (prec,
3704 TYPE_UNSIGNED (TREE_TYPE (var)));
3705 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3706 return NULL;
3708 if (!check_bool_pattern (var, vinfo, bool_stmts))
3709 return NULL;
3711 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3713 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3714 pattern_stmt
3715 = gimple_build_assign (lhs, COND_EXPR,
3716 build2 (NE_EXPR, boolean_type_node,
3717 rhs, build_int_cst (type, 0)),
3718 gimple_assign_rhs2 (last_stmt),
3719 gimple_assign_rhs3 (last_stmt));
3720 *type_out = vectype;
3721 *type_in = vectype;
3722 stmts->safe_push (last_stmt);
3723 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3725 return pattern_stmt;
3727 else if (rhs_code == SSA_NAME
3728 && STMT_VINFO_DATA_REF (stmt_vinfo))
3730 stmt_vec_info pattern_stmt_info;
3731 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3732 gcc_assert (vectype != NULL_TREE);
3733 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3734 return NULL;
3736 if (check_bool_pattern (var, vinfo, bool_stmts))
3737 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3738 else
3740 tree type = search_type_for_mask (var, vinfo);
3741 tree cst0, cst1, new_vectype;
3743 if (!type)
3744 return NULL;
3746 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3747 type = TREE_TYPE (vectype);
3749 cst0 = build_int_cst (type, 0);
3750 cst1 = build_int_cst (type, 1);
3751 new_vectype = get_vectype_for_scalar_type (type);
3753 rhs = vect_recog_temp_ssa_var (type, NULL);
3754 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3756 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3757 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3758 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3759 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3762 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3763 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3765 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3766 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3767 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3768 rhs = rhs2;
3770 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3771 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3772 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3773 STMT_VINFO_DATA_REF (pattern_stmt_info)
3774 = STMT_VINFO_DATA_REF (stmt_vinfo);
3775 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3776 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3777 *type_out = vectype;
3778 *type_in = vectype;
3779 stmts->safe_push (last_stmt);
3780 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3782 return pattern_stmt;
3784 else
3785 return NULL;
3789 /* A helper for vect_recog_mask_conversion_pattern. Build
3790 conversion of MASK to a type suitable for masking VECTYPE.
3791 Built statement gets required vectype and is appended to
3792 a pattern sequence of STMT_VINFO.
3794 Return converted mask. */
3796 static tree
3797 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3798 vec_info *vinfo)
3800 gimple *stmt;
3801 tree masktype, tmp;
3802 stmt_vec_info new_stmt_info;
3804 masktype = build_same_sized_truth_vector_type (vectype);
3805 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3806 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3807 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3808 set_vinfo_for_stmt (stmt, new_stmt_info);
3809 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3810 append_pattern_def_seq (stmt_vinfo, stmt);
3812 return tmp;
3816 /* Function vect_recog_mask_conversion_pattern
3818 Try to find statements which require boolean type
3819 converison. Additional conversion statements are
3820 added to handle such cases. For example:
3822 bool m_1, m_2, m_3;
3823 int i_4, i_5;
3824 double d_6, d_7;
3825 char c_1, c_2, c_3;
3827 S1 m_1 = i_4 > i_5;
3828 S2 m_2 = d_6 < d_7;
3829 S3 m_3 = m_1 & m_2;
3830 S4 c_1 = m_3 ? c_2 : c_3;
3832 Will be transformed into:
3834 S1 m_1 = i_4 > i_5;
3835 S2 m_2 = d_6 < d_7;
3836 S3'' m_2' = (_Bool[bitsize=32])m_2
3837 S3' m_3' = m_1 & m_2';
3838 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3839 S4' c_1' = m_3'' ? c_2 : c_3; */
3841 static gimple *
3842 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3843 tree *type_out)
3845 gimple *last_stmt = stmts->pop ();
3846 enum tree_code rhs_code;
3847 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3848 tree vectype1, vectype2;
3849 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3850 stmt_vec_info pattern_stmt_info;
3851 vec_info *vinfo = stmt_vinfo->vinfo;
3853 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3854 if (is_gimple_call (last_stmt)
3855 && gimple_call_internal_p (last_stmt)
3856 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3857 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3859 gcall *pattern_stmt;
3860 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3862 if (load)
3864 lhs = gimple_call_lhs (last_stmt);
3865 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3867 else
3869 rhs2 = gimple_call_arg (last_stmt, 3);
3870 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3873 rhs1 = gimple_call_arg (last_stmt, 2);
3874 rhs1_type = search_type_for_mask (rhs1, vinfo);
3875 if (!rhs1_type)
3876 return NULL;
3877 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3879 if (!vectype1 || !vectype2
3880 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3881 TYPE_VECTOR_SUBPARTS (vectype2)))
3882 return NULL;
3884 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3886 if (load)
3888 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3889 pattern_stmt
3890 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3891 gimple_call_arg (last_stmt, 0),
3892 gimple_call_arg (last_stmt, 1),
3893 tmp);
3894 gimple_call_set_lhs (pattern_stmt, lhs);
3896 else
3897 pattern_stmt
3898 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3899 gimple_call_arg (last_stmt, 0),
3900 gimple_call_arg (last_stmt, 1),
3901 tmp,
3902 gimple_call_arg (last_stmt, 3));
3904 gimple_call_set_nothrow (pattern_stmt, true);
3906 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3907 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3908 STMT_VINFO_DATA_REF (pattern_stmt_info)
3909 = STMT_VINFO_DATA_REF (stmt_vinfo);
3910 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3911 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3913 *type_out = vectype1;
3914 *type_in = vectype1;
3915 stmts->safe_push (last_stmt);
3916 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3918 return pattern_stmt;
3921 if (!is_gimple_assign (last_stmt))
3922 return NULL;
3924 gimple *pattern_stmt;
3925 lhs = gimple_assign_lhs (last_stmt);
3926 rhs1 = gimple_assign_rhs1 (last_stmt);
3927 rhs_code = gimple_assign_rhs_code (last_stmt);
3929 /* Check for cond expression requiring mask conversion. */
3930 if (rhs_code == COND_EXPR)
3932 /* vect_recog_mixed_size_cond_pattern could apply.
3933 Do nothing then. */
3934 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3935 return NULL;
3937 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3939 if (TREE_CODE (rhs1) == SSA_NAME)
3941 rhs1_type = search_type_for_mask (rhs1, vinfo);
3942 if (!rhs1_type)
3943 return NULL;
3945 else if (COMPARISON_CLASS_P (rhs1))
3947 /* Check whether we're comparing scalar booleans and (if so)
3948 whether a better mask type exists than the mask associated
3949 with boolean-sized elements. This avoids unnecessary packs
3950 and unpacks if the booleans are set from comparisons of
3951 wider types. E.g. in:
3953 int x1, x2, x3, x4, y1, y1;
3955 bool b1 = (x1 == x2);
3956 bool b2 = (x3 == x4);
3957 ... = b1 == b2 ? y1 : y2;
3959 it is better for b1 and b2 to use the mask type associated
3960 with int elements rather bool (byte) elements. */
3961 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
3962 if (!rhs1_type)
3963 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3965 else
3966 return NULL;
3968 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3970 if (!vectype1 || !vectype2)
3971 return NULL;
3973 /* Continue if a conversion is needed. Also continue if we have
3974 a comparison whose vector type would normally be different from
3975 VECTYPE2 when considered in isolation. In that case we'll
3976 replace the comparison with an SSA name (so that we can record
3977 its vector type) and behave as though the comparison was an SSA
3978 name from the outset. */
3979 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3980 TYPE_VECTOR_SUBPARTS (vectype2))
3981 && (TREE_CODE (rhs1) == SSA_NAME
3982 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
3983 return NULL;
3985 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
3986 in place, we can handle it in vectorizable_condition. This avoids
3987 unnecessary promotion stmts and increased vectorization factor. */
3988 if (COMPARISON_CLASS_P (rhs1)
3989 && INTEGRAL_TYPE_P (rhs1_type)
3990 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
3991 TYPE_VECTOR_SUBPARTS (vectype2)))
3993 gimple *dummy;
3994 enum vect_def_type dt;
3995 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
3996 &dummy, &dt)
3997 && dt == vect_external_def
3998 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
3999 &dummy, &dt)
4000 && (dt == vect_external_def
4001 || dt == vect_constant_def))
4003 tree wide_scalar_type = build_nonstandard_integer_type
4004 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4005 TYPE_UNSIGNED (rhs1_type));
4006 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4007 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4008 return NULL;
4012 /* If rhs1 is a comparison we need to move it into a
4013 separate statement. */
4014 if (TREE_CODE (rhs1) != SSA_NAME)
4016 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4017 pattern_stmt = gimple_build_assign (tmp, rhs1);
4018 rhs1 = tmp;
4020 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4021 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4022 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4023 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4026 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4027 TYPE_VECTOR_SUBPARTS (vectype2)))
4028 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4029 else
4030 tmp = rhs1;
4032 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4033 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4034 gimple_assign_rhs2 (last_stmt),
4035 gimple_assign_rhs3 (last_stmt));
4037 *type_out = vectype1;
4038 *type_in = vectype1;
4039 stmts->safe_push (last_stmt);
4040 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4042 return pattern_stmt;
4045 /* Now check for binary boolean operations requiring conversion for
4046 one of operands. */
4047 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4048 return NULL;
4050 if (rhs_code != BIT_IOR_EXPR
4051 && rhs_code != BIT_XOR_EXPR
4052 && rhs_code != BIT_AND_EXPR
4053 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4054 return NULL;
4056 rhs2 = gimple_assign_rhs2 (last_stmt);
4058 rhs1_type = search_type_for_mask (rhs1, vinfo);
4059 rhs2_type = search_type_for_mask (rhs2, vinfo);
4061 if (!rhs1_type || !rhs2_type
4062 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4063 return NULL;
4065 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4067 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4068 if (!vectype1)
4069 return NULL;
4070 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4072 else
4074 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4075 if (!vectype1)
4076 return NULL;
4077 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4080 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4081 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4083 *type_out = vectype1;
4084 *type_in = vectype1;
4085 stmts->safe_push (last_stmt);
4086 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4088 return pattern_stmt;
4091 /* STMT is a load or store. If the load or store is conditional, return
4092 the boolean condition under which it occurs, otherwise return null. */
4094 static tree
4095 vect_get_load_store_mask (gimple *stmt)
4097 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4099 gcc_assert (gimple_assign_single_p (def_assign));
4100 return NULL_TREE;
4103 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4105 internal_fn ifn = gimple_call_internal_fn (def_call);
4106 int mask_index = internal_fn_mask_index (ifn);
4107 return gimple_call_arg (def_call, mask_index);
4110 gcc_unreachable ();
4113 /* Return the scalar offset type that an internal gather/scatter function
4114 should use. GS_INFO describes the gather/scatter operation. */
4116 static tree
4117 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4119 tree offset_type = TREE_TYPE (gs_info->offset);
4120 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4122 /* Enforced by vect_check_gather_scatter. */
4123 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4124 gcc_assert (element_bits >= offset_bits);
4126 /* If the offset is narrower than the elements, extend it according
4127 to its sign. */
4128 if (element_bits > offset_bits)
4129 return build_nonstandard_integer_type (element_bits,
4130 TYPE_UNSIGNED (offset_type));
4132 return offset_type;
4135 /* Return MASK if MASK is suitable for masking an operation on vectors
4136 of type VECTYPE, otherwise convert it into such a form and return
4137 the result. Associate any conversion statements with STMT_INFO's
4138 pattern. */
4140 static tree
4141 vect_convert_mask_for_vectype (tree mask, tree vectype,
4142 stmt_vec_info stmt_info, vec_info *vinfo)
4144 tree mask_type = search_type_for_mask (mask, vinfo);
4145 if (mask_type)
4147 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4148 if (mask_vectype
4149 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4150 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4151 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4153 return mask;
4156 /* Return the equivalent of:
4158 fold_convert (TYPE, VALUE)
4160 with the expectation that the operation will be vectorized.
4161 If new statements are needed, add them as pattern statements
4162 to STMT_INFO. */
4164 static tree
4165 vect_add_conversion_to_patterm (tree type, tree value,
4166 stmt_vec_info stmt_info,
4167 vec_info *vinfo)
4169 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4170 return value;
4172 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4173 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4174 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4175 set_vinfo_for_stmt (conversion, new_stmt_info);
4176 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4177 append_pattern_def_seq (stmt_info, conversion);
4178 return new_value;
4181 /* Try to convert STMT into a call to a gather load or scatter store
4182 internal function. Return the final statement on success and set
4183 *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4185 This function only handles gathers and scatters that were recognized
4186 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4188 static gimple *
4189 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4190 tree *type_in, tree *type_out)
4192 /* Currently we only support this for loop vectorization. */
4193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4194 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4195 if (!loop_vinfo)
4196 return NULL;
4198 /* Make sure that we're looking at a gather load or scatter store. */
4199 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4200 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4201 return NULL;
4203 /* Get the boolean that controls whether the load or store happens.
4204 This is null if the operation is unconditional. */
4205 tree mask = vect_get_load_store_mask (stmt);
4207 /* Make sure that the target supports an appropriate internal
4208 function for the gather/scatter operation. */
4209 gather_scatter_info gs_info;
4210 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4211 || gs_info.decl)
4212 return NULL;
4214 /* Convert the mask to the right form. */
4215 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4216 if (mask)
4217 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4218 loop_vinfo);
4220 /* Get the invariant base and non-invariant offset, converting the
4221 latter to the same width as the vector elements. */
4222 tree base = gs_info.base;
4223 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4224 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4225 last_stmt_info, loop_vinfo);
4227 /* Build the new pattern statement. */
4228 tree scale = size_int (gs_info.scale);
4229 gcall *pattern_stmt;
4230 if (DR_IS_READ (dr))
4232 if (mask != NULL)
4233 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4234 offset, scale, mask);
4235 else
4236 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4237 offset, scale);
4238 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4239 gimple_call_set_lhs (pattern_stmt, load_lhs);
4241 else
4243 tree rhs = vect_get_store_rhs (stmt);
4244 if (mask != NULL)
4245 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4246 base, offset, scale, rhs,
4247 mask);
4248 else
4249 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4250 base, offset, scale, rhs);
4252 gimple_call_set_nothrow (pattern_stmt, true);
4254 /* Copy across relevant vectorization info and associate DR with the
4255 new pattern statement instead of the original statement. */
4256 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4257 loop_vinfo);
4258 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4259 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4260 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4261 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4262 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4263 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4265 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4266 *type_out = vectype;
4267 *type_in = vectype;
4268 vect_pattern_detected ("gather/scatter pattern", stmt);
4270 return pattern_stmt;
4273 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4275 static gimple *
4276 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
4277 tree *type_out)
4279 gimple *last_stmt = stmts->pop ();
4280 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4281 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4282 last_stmt_info,
4283 type_in, type_out);
4284 if (pattern_stmt)
4285 stmts->safe_push (last_stmt);
4286 return pattern_stmt;
4289 /* Mark statements that are involved in a pattern. */
4291 static inline void
4292 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4293 tree pattern_vectype)
4295 stmt_vec_info pattern_stmt_info, def_stmt_info;
4296 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4297 vec_info *vinfo = orig_stmt_info->vinfo;
4298 gimple *def_stmt;
4300 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4301 if (pattern_stmt_info == NULL)
4303 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4304 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4306 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4308 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4309 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4310 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4311 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4312 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4313 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4314 if (gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info))
4315 for (gimple_stmt_iterator si = gsi_start (def_seq);
4316 !gsi_end_p (si); gsi_next (&si))
4318 def_stmt = gsi_stmt (si);
4319 def_stmt_info = vinfo_for_stmt (def_stmt);
4320 if (def_stmt_info == NULL)
4322 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4323 set_vinfo_for_stmt (def_stmt, def_stmt_info);
4325 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4326 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4327 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4328 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4329 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4333 /* Function vect_pattern_recog_1
4335 Input:
4336 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4337 computation pattern.
4338 STMT: A stmt from which the pattern search should start.
4340 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4341 expression that computes the same functionality and can be used to
4342 replace the sequence of stmts that are involved in the pattern.
4344 Output:
4345 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4346 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4347 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4348 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4349 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4350 to the available target pattern.
4352 This function also does some bookkeeping, as explained in the documentation
4353 for vect_recog_pattern. */
4355 static bool
4356 vect_pattern_recog_1 (vect_recog_func *recog_func,
4357 gimple_stmt_iterator si,
4358 vec<gimple *> *stmts_to_replace)
4360 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4361 stmt_vec_info stmt_info;
4362 loop_vec_info loop_vinfo;
4363 tree pattern_vectype;
4364 tree type_in, type_out;
4365 enum tree_code code;
4366 int i;
4368 stmts_to_replace->truncate (0);
4369 stmts_to_replace->quick_push (stmt);
4370 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4371 if (!pattern_stmt)
4373 /* Clear related stmt info that analysis might have noted for
4374 to be replaced stmts. */
4375 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4376 && (unsigned) i < stmts_to_replace->length ();
4377 i++)
4379 stmt_info = vinfo_for_stmt (stmt);
4380 STMT_VINFO_RELATED_STMT (stmt_info) = NULL;
4382 return false;
4385 stmt = stmts_to_replace->last ();
4386 stmt_info = vinfo_for_stmt (stmt);
4387 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4389 if (VECTOR_BOOLEAN_TYPE_P (type_in)
4390 || VECTOR_TYPE_P (type_in))
4392 /* No need to check target support (already checked by the pattern
4393 recognition function). */
4394 pattern_vectype = type_out ? type_out : type_in;
4396 else
4398 /* Check target support */
4399 type_in = get_vectype_for_scalar_type (type_in);
4400 if (!type_in)
4401 return false;
4402 if (type_out)
4403 type_out = get_vectype_for_scalar_type (type_out);
4404 else
4405 type_out = type_in;
4406 if (!type_out)
4407 return false;
4408 pattern_vectype = type_out;
4410 if (is_gimple_assign (pattern_stmt))
4412 enum insn_code icode;
4413 code = gimple_assign_rhs_code (pattern_stmt);
4414 optab optab = optab_for_tree_code (code, type_in, optab_default);
4415 machine_mode vec_mode = TYPE_MODE (type_in);
4416 if (!optab
4417 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4418 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4419 return false;
4421 else
4422 gcc_assert (is_gimple_call (pattern_stmt));
4425 /* Found a vectorizable pattern. */
4426 if (dump_enabled_p ())
4428 dump_printf_loc (MSG_NOTE, vect_location,
4429 "%s pattern recognized: ", recog_func->name);
4430 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4433 /* Mark the stmts that are involved in the pattern. */
4434 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4436 /* Patterns cannot be vectorized using SLP, because they change the order of
4437 computation. */
4438 if (loop_vinfo)
4440 unsigned ix, ix2;
4441 gimple **elem_ptr;
4442 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4443 elem_ptr, *elem_ptr == stmt);
4446 /* It is possible that additional pattern stmts are created and inserted in
4447 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4448 relevant statements. */
4449 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4450 && (unsigned) i < (stmts_to_replace->length () - 1);
4451 i++)
4453 stmt_info = vinfo_for_stmt (stmt);
4454 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4455 if (dump_enabled_p ())
4457 dump_printf_loc (MSG_NOTE, vect_location,
4458 "additional pattern stmt: ");
4459 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4462 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4465 return true;
4469 /* Function vect_pattern_recog
4471 Input:
4472 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4473 computation idioms.
4475 Output - for each computation idiom that is detected we create a new stmt
4476 that provides the same functionality and that can be vectorized. We
4477 also record some information in the struct_stmt_info of the relevant
4478 stmts, as explained below:
4480 At the entry to this function we have the following stmts, with the
4481 following initial value in the STMT_VINFO fields:
4483 stmt in_pattern_p related_stmt vec_stmt
4484 S1: a_i = .... - - -
4485 S2: a_2 = ..use(a_i).. - - -
4486 S3: a_1 = ..use(a_2).. - - -
4487 S4: a_0 = ..use(a_1).. - - -
4488 S5: ... = ..use(a_0).. - - -
4490 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4491 represented by a single stmt. We then:
4492 - create a new stmt S6 equivalent to the pattern (the stmt is not
4493 inserted into the code)
4494 - fill in the STMT_VINFO fields as follows:
4496 in_pattern_p related_stmt vec_stmt
4497 S1: a_i = .... - - -
4498 S2: a_2 = ..use(a_i).. - - -
4499 S3: a_1 = ..use(a_2).. - - -
4500 S4: a_0 = ..use(a_1).. true S6 -
4501 '---> S6: a_new = .... - S4 -
4502 S5: ... = ..use(a_0).. - - -
4504 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4505 to each other through the RELATED_STMT field).
4507 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4508 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4509 remain irrelevant unless used by stmts other than S4.
4511 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4512 (because they are marked as irrelevant). It will vectorize S6, and record
4513 a pointer to the new vector stmt VS6 from S6 (as usual).
4514 S4 will be skipped, and S5 will be vectorized as usual:
4516 in_pattern_p related_stmt vec_stmt
4517 S1: a_i = .... - - -
4518 S2: a_2 = ..use(a_i).. - - -
4519 S3: a_1 = ..use(a_2).. - - -
4520 > VS6: va_new = .... - - -
4521 S4: a_0 = ..use(a_1).. true S6 VS6
4522 '---> S6: a_new = .... - S4 VS6
4523 > VS5: ... = ..vuse(va_new).. - - -
4524 S5: ... = ..use(a_0).. - - -
4526 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4527 elsewhere), and we'll end up with:
4529 VS6: va_new = ....
4530 VS5: ... = ..vuse(va_new)..
4532 In case of more than one pattern statements, e.g., widen-mult with
4533 intermediate type:
4535 S1 a_t = ;
4536 S2 a_T = (TYPE) a_t;
4537 '--> S3: a_it = (interm_type) a_t;
4538 S4 prod_T = a_T * CONST;
4539 '--> S5: prod_T' = a_it w* CONST;
4541 there may be other users of a_T outside the pattern. In that case S2 will
4542 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4543 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4544 be recorded in S3. */
4546 void
4547 vect_pattern_recog (vec_info *vinfo)
4549 struct loop *loop;
4550 basic_block *bbs;
4551 unsigned int nbbs;
4552 gimple_stmt_iterator si;
4553 unsigned int i, j;
4554 auto_vec<gimple *, 1> stmts_to_replace;
4556 DUMP_VECT_SCOPE ("vect_pattern_recog");
4558 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4560 loop = LOOP_VINFO_LOOP (loop_vinfo);
4561 bbs = LOOP_VINFO_BBS (loop_vinfo);
4562 nbbs = loop->num_nodes;
4564 /* Scan through the loop stmts, applying the pattern recognition
4565 functions starting at each stmt visited: */
4566 for (i = 0; i < nbbs; i++)
4568 basic_block bb = bbs[i];
4569 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4571 gimple *stmt = gsi_stmt (si);
4572 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4573 if (stmt_info && STMT_VINFO_IN_PATTERN_P (stmt_info))
4574 continue;
4575 /* Scan over all generic vect_recog_xxx_pattern functions. */
4576 for (j = 0; j < NUM_PATTERNS; j++)
4577 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4578 &stmts_to_replace))
4579 break;
4583 else
4585 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4586 for (si = bb_vinfo->region_begin;
4587 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4589 gimple *stmt = gsi_stmt (si);
4590 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4591 if (stmt_info
4592 && (!STMT_VINFO_VECTORIZABLE (stmt_info)
4593 || STMT_VINFO_IN_PATTERN_P (stmt_info)))
4594 continue;
4596 /* Scan over all generic vect_recog_xxx_pattern functions. */
4597 for (j = 0; j < NUM_PATTERNS; j++)
4598 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4599 &stmts_to_replace))
4600 break;