Fix broken MinGW build of gcc.c
[official-gcc.git] / gcc / tree-vect-patterns.c
blob877711a4f800bd26518cfce6aa1f061993e97e4c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
45 /* Pattern recognition functions */
46 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
47 tree *);
48 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
49 tree *);
50 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
53 tree *);
54 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
55 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
56 tree *);
57 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
58 tree *, tree *);
59 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
60 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
61 tree *, tree *);
62 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
63 tree *, tree *);
65 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
66 tree *, tree *);
68 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69 tree *, tree *);
70 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
71 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
73 struct vect_recog_func
75 vect_recog_func_ptr fn;
76 const char *name;
79 /* Note that ordering matters - the first pattern matching on a stmt
80 is taken which means usually the more complex one needs to preceed
81 the less comples onex (widen_sum only after dot_prod or sad for example). */
82 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
83 { vect_recog_widen_mult_pattern, "widen_mult" },
84 { vect_recog_dot_prod_pattern, "dot_prod" },
85 { vect_recog_sad_pattern, "sad" },
86 { vect_recog_widen_sum_pattern, "widen_sum" },
87 { vect_recog_pow_pattern, "pow" },
88 { vect_recog_widen_shift_pattern, "widen_shift" },
89 { vect_recog_over_widening_pattern, "over_widening" },
90 { vect_recog_rotate_pattern, "rotate" },
91 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
92 { vect_recog_divmod_pattern, "divmod" },
93 { vect_recog_mult_pattern, "mult" },
94 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
95 { vect_recog_bool_pattern, "bool" },
96 { vect_recog_mask_conversion_pattern, "mask_conversion" }
99 static inline void
100 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
102 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
103 stmt);
106 static inline void
107 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
109 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
110 append_pattern_def_seq (stmt_info, stmt);
113 /* Check whether STMT2 is in the same loop or basic block as STMT1.
114 Which of the two applies depends on whether we're currently doing
115 loop-based or basic-block-based vectorization, as determined by
116 the vinfo_for_stmt for STMT1 (which must be defined).
118 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
119 to be defined as well. */
121 static bool
122 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
125 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
128 /* If the LHS of DEF_STMT has a single use, and that statement is
129 in the same loop or basic block, return it. */
131 static gimple *
132 vect_single_imm_use (gimple *def_stmt)
134 tree lhs = gimple_assign_lhs (def_stmt);
135 use_operand_p use_p;
136 gimple *use_stmt;
138 if (!single_imm_use (lhs, &use_p, &use_stmt))
139 return NULL;
141 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
142 return NULL;
144 return use_stmt;
147 /* Check whether NAME, an ssa-name used in USE_STMT,
148 is a result of a type promotion, such that:
149 DEF_STMT: NAME = NOP (name0)
150 If CHECK_SIGN is TRUE, check that either both types are signed or both are
151 unsigned. */
153 static bool
154 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
155 tree *orig_type, gimple **def_stmt, bool *promotion)
157 gimple *dummy_gimple;
158 stmt_vec_info stmt_vinfo;
159 tree type = TREE_TYPE (name);
160 tree oprnd0;
161 enum vect_def_type dt;
163 stmt_vinfo = vinfo_for_stmt (use_stmt);
164 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
165 return false;
167 if (dt != vect_internal_def
168 && dt != vect_external_def && dt != vect_constant_def)
169 return false;
171 if (!*def_stmt)
172 return false;
174 if (dt == vect_internal_def)
176 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
177 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
178 return false;
181 if (!is_gimple_assign (*def_stmt))
182 return false;
184 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
185 return false;
187 oprnd0 = gimple_assign_rhs1 (*def_stmt);
189 *orig_type = TREE_TYPE (oprnd0);
190 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
191 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
192 return false;
194 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
195 *promotion = true;
196 else
197 *promotion = false;
199 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
200 return false;
202 return true;
205 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
206 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
208 static tree
209 vect_recog_temp_ssa_var (tree type, gimple *stmt)
211 return make_temp_ssa_name (type, stmt, "patt");
214 /* Function vect_recog_dot_prod_pattern
216 Try to find the following pattern:
218 type x_t, y_t;
219 TYPE1 prod;
220 TYPE2 sum = init;
221 loop:
222 sum_0 = phi <init, sum_1>
223 S1 x_t = ...
224 S2 y_t = ...
225 S3 x_T = (TYPE1) x_t;
226 S4 y_T = (TYPE1) y_t;
227 S5 prod = x_T * y_T;
228 [S6 prod = (TYPE2) prod; #optional]
229 S7 sum_1 = prod + sum_0;
231 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
232 same size of 'TYPE1' or bigger. This is a special case of a reduction
233 computation.
235 Input:
237 * STMTS: Contains a stmt from which the pattern search begins. In the
238 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
239 will be detected.
241 Output:
243 * TYPE_IN: The type of the input arguments to the pattern.
245 * TYPE_OUT: The type of the output of this pattern.
247 * Return value: A new stmt that will be used to replace the sequence of
248 stmts that constitute the pattern. In this case it will be:
249 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
251 Note: The dot-prod idiom is a widening reduction pattern that is
252 vectorized without preserving all the intermediate results. It
253 produces only N/2 (widened) results (by summing up pairs of
254 intermediate results) rather than all N results. Therefore, we
255 cannot allow this pattern when we want to get all the results and in
256 the correct order (as is the case when this computation is in an
257 inner-loop nested in an outer-loop that us being vectorized). */
259 static gimple *
260 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
261 tree *type_out)
263 gimple *stmt, *last_stmt = (*stmts)[0];
264 tree oprnd0, oprnd1;
265 tree oprnd00, oprnd01;
266 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
267 tree type, half_type;
268 gimple *pattern_stmt;
269 tree prod_type;
270 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
271 struct loop *loop;
272 tree var;
273 bool promotion;
275 if (!loop_info)
276 return NULL;
278 loop = LOOP_VINFO_LOOP (loop_info);
280 /* We don't allow changing the order of the computation in the inner-loop
281 when doing outer-loop vectorization. */
282 if (loop && nested_in_vect_loop_p (loop, last_stmt))
283 return NULL;
285 if (!is_gimple_assign (last_stmt))
286 return NULL;
288 type = gimple_expr_type (last_stmt);
290 /* Look for the following pattern
291 DX = (TYPE1) X;
292 DY = (TYPE1) Y;
293 DPROD = DX * DY;
294 DDPROD = (TYPE2) DPROD;
295 sum_1 = DDPROD + sum_0;
296 In which
297 - DX is double the size of X
298 - DY is double the size of Y
299 - DX, DY, DPROD all have the same type
300 - sum is the same size of DPROD or bigger
301 - sum has been recognized as a reduction variable.
303 This is equivalent to:
304 DPROD = X w* Y; #widen mult
305 sum_1 = DPROD w+ sum_0; #widen summation
307 DPROD = X w* Y; #widen mult
308 sum_1 = DPROD + sum_0; #summation
311 /* Starting from LAST_STMT, follow the defs of its uses in search
312 of the above pattern. */
314 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
315 return NULL;
317 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
319 /* Has been detected as widening-summation? */
321 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
322 type = gimple_expr_type (stmt);
323 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
324 return NULL;
325 oprnd0 = gimple_assign_rhs1 (stmt);
326 oprnd1 = gimple_assign_rhs2 (stmt);
327 half_type = TREE_TYPE (oprnd0);
329 else
331 gimple *def_stmt;
333 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
334 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
335 return NULL;
336 oprnd0 = gimple_assign_rhs1 (last_stmt);
337 oprnd1 = gimple_assign_rhs2 (last_stmt);
338 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
339 || !types_compatible_p (TREE_TYPE (oprnd1), type))
340 return NULL;
341 stmt = last_stmt;
343 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
344 &promotion)
345 && promotion)
347 stmt = def_stmt;
348 oprnd0 = gimple_assign_rhs1 (stmt);
350 else
351 half_type = type;
354 /* So far so good. Since last_stmt was detected as a (summation) reduction,
355 we know that oprnd1 is the reduction variable (defined by a loop-header
356 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
357 Left to check that oprnd0 is defined by a (widen_)mult_expr */
358 if (TREE_CODE (oprnd0) != SSA_NAME)
359 return NULL;
361 prod_type = half_type;
362 stmt = SSA_NAME_DEF_STMT (oprnd0);
364 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
365 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
366 return NULL;
368 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
369 inside the loop (in case we are analyzing an outer-loop). */
370 if (!is_gimple_assign (stmt))
371 return NULL;
372 stmt_vinfo = vinfo_for_stmt (stmt);
373 gcc_assert (stmt_vinfo);
374 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
375 return NULL;
376 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
377 return NULL;
378 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
380 /* Has been detected as a widening multiplication? */
382 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
383 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
384 return NULL;
385 stmt_vinfo = vinfo_for_stmt (stmt);
386 gcc_assert (stmt_vinfo);
387 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
388 oprnd00 = gimple_assign_rhs1 (stmt);
389 oprnd01 = gimple_assign_rhs2 (stmt);
390 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
391 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
393 else
395 tree half_type0, half_type1;
396 gimple *def_stmt;
397 tree oprnd0, oprnd1;
399 oprnd0 = gimple_assign_rhs1 (stmt);
400 oprnd1 = gimple_assign_rhs2 (stmt);
401 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
402 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
403 return NULL;
404 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
405 &promotion)
406 || !promotion)
407 return NULL;
408 oprnd00 = gimple_assign_rhs1 (def_stmt);
409 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
410 &promotion)
411 || !promotion)
412 return NULL;
413 oprnd01 = gimple_assign_rhs1 (def_stmt);
414 if (!types_compatible_p (half_type0, half_type1))
415 return NULL;
416 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
417 return NULL;
420 half_type = TREE_TYPE (oprnd00);
421 *type_in = half_type;
422 *type_out = type;
424 /* Pattern detected. Create a stmt to be used to replace the pattern: */
425 var = vect_recog_temp_ssa_var (type, NULL);
426 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
427 oprnd00, oprnd01, oprnd1);
429 if (dump_enabled_p ())
431 dump_printf_loc (MSG_NOTE, vect_location,
432 "vect_recog_dot_prod_pattern: detected: ");
433 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
436 return pattern_stmt;
440 /* Function vect_recog_sad_pattern
442 Try to find the following Sum of Absolute Difference (SAD) pattern:
444 type x_t, y_t;
445 signed TYPE1 diff, abs_diff;
446 TYPE2 sum = init;
447 loop:
448 sum_0 = phi <init, sum_1>
449 S1 x_t = ...
450 S2 y_t = ...
451 S3 x_T = (TYPE1) x_t;
452 S4 y_T = (TYPE1) y_t;
453 S5 diff = x_T - y_T;
454 S6 abs_diff = ABS_EXPR <diff>;
455 [S7 abs_diff = (TYPE2) abs_diff; #optional]
456 S8 sum_1 = abs_diff + sum_0;
458 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
459 same size of 'TYPE1' or bigger. This is a special case of a reduction
460 computation.
462 Input:
464 * STMTS: Contains a stmt from which the pattern search begins. In the
465 example, when this function is called with S8, the pattern
466 {S3,S4,S5,S6,S7,S8} will be detected.
468 Output:
470 * TYPE_IN: The type of the input arguments to the pattern.
472 * TYPE_OUT: The type of the output of this pattern.
474 * Return value: A new stmt that will be used to replace the sequence of
475 stmts that constitute the pattern. In this case it will be:
476 SAD_EXPR <x_t, y_t, sum_0>
479 static gimple *
480 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
481 tree *type_out)
483 gimple *last_stmt = (*stmts)[0];
484 tree sad_oprnd0, sad_oprnd1;
485 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
486 tree half_type;
487 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
488 struct loop *loop;
489 bool promotion;
491 if (!loop_info)
492 return NULL;
494 loop = LOOP_VINFO_LOOP (loop_info);
496 /* We don't allow changing the order of the computation in the inner-loop
497 when doing outer-loop vectorization. */
498 if (loop && nested_in_vect_loop_p (loop, last_stmt))
499 return NULL;
501 if (!is_gimple_assign (last_stmt))
502 return NULL;
504 tree sum_type = gimple_expr_type (last_stmt);
506 /* Look for the following pattern
507 DX = (TYPE1) X;
508 DY = (TYPE1) Y;
509 DDIFF = DX - DY;
510 DAD = ABS_EXPR <DDIFF>;
511 DDPROD = (TYPE2) DPROD;
512 sum_1 = DAD + sum_0;
513 In which
514 - DX is at least double the size of X
515 - DY is at least double the size of Y
516 - DX, DY, DDIFF, DAD all have the same type
517 - sum is the same size of DAD or bigger
518 - sum has been recognized as a reduction variable.
520 This is equivalent to:
521 DDIFF = X w- Y; #widen sub
522 DAD = ABS_EXPR <DDIFF>;
523 sum_1 = DAD w+ sum_0; #widen summation
525 DDIFF = X w- Y; #widen sub
526 DAD = ABS_EXPR <DDIFF>;
527 sum_1 = DAD + sum_0; #summation
530 /* Starting from LAST_STMT, follow the defs of its uses in search
531 of the above pattern. */
533 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
534 return NULL;
536 tree plus_oprnd0, plus_oprnd1;
538 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
540 /* Has been detected as widening-summation? */
542 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
543 sum_type = gimple_expr_type (stmt);
544 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
545 return NULL;
546 plus_oprnd0 = gimple_assign_rhs1 (stmt);
547 plus_oprnd1 = gimple_assign_rhs2 (stmt);
548 half_type = TREE_TYPE (plus_oprnd0);
550 else
552 gimple *def_stmt;
554 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
555 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
556 return NULL;
557 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
558 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
559 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
560 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
561 return NULL;
563 /* The type conversion could be promotion, demotion,
564 or just signed -> unsigned. */
565 if (type_conversion_p (plus_oprnd0, last_stmt, false,
566 &half_type, &def_stmt, &promotion))
567 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
568 else
569 half_type = sum_type;
572 /* So far so good. Since last_stmt was detected as a (summation) reduction,
573 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
574 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
575 Then check that plus_oprnd0 is defined by an abs_expr. */
577 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
578 return NULL;
580 tree abs_type = half_type;
581 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
583 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
584 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
585 return NULL;
587 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
588 inside the loop (in case we are analyzing an outer-loop). */
589 if (!is_gimple_assign (abs_stmt))
590 return NULL;
592 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
593 gcc_assert (abs_stmt_vinfo);
594 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
595 return NULL;
596 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
597 return NULL;
599 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
600 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
601 return NULL;
602 if (TYPE_UNSIGNED (abs_type))
603 return NULL;
605 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
607 if (TREE_CODE (abs_oprnd) != SSA_NAME)
608 return NULL;
610 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
612 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
613 if (!gimple_bb (diff_stmt)
614 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
615 return NULL;
617 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
618 inside the loop (in case we are analyzing an outer-loop). */
619 if (!is_gimple_assign (diff_stmt))
620 return NULL;
622 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
623 gcc_assert (diff_stmt_vinfo);
624 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
625 return NULL;
626 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
627 return NULL;
629 tree half_type0, half_type1;
630 gimple *def_stmt;
632 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
633 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
635 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
636 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
637 return NULL;
638 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
639 &half_type0, &def_stmt, &promotion)
640 || !promotion)
641 return NULL;
642 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
644 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
645 &half_type1, &def_stmt, &promotion)
646 || !promotion)
647 return NULL;
648 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
650 if (!types_compatible_p (half_type0, half_type1))
651 return NULL;
652 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
653 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
654 return NULL;
656 *type_in = TREE_TYPE (sad_oprnd0);
657 *type_out = sum_type;
659 /* Pattern detected. Create a stmt to be used to replace the pattern: */
660 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
661 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
662 sad_oprnd1, plus_oprnd1);
664 if (dump_enabled_p ())
666 dump_printf_loc (MSG_NOTE, vect_location,
667 "vect_recog_sad_pattern: detected: ");
668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
671 return pattern_stmt;
675 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
676 and LSHIFT_EXPR.
678 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
679 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
681 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
682 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
683 that satisfies the above restrictions, we can perform a widening opeartion
684 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
685 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
687 static bool
688 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
689 tree const_oprnd, tree *oprnd,
690 gimple **wstmt, tree type,
691 tree *half_type, gimple *def_stmt)
693 tree new_type, new_oprnd;
695 if (code != MULT_EXPR && code != LSHIFT_EXPR)
696 return false;
698 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
699 || (code == LSHIFT_EXPR
700 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
701 != 1))
702 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
704 /* CONST_OPRND is a constant of HALF_TYPE. */
705 *oprnd = gimple_assign_rhs1 (def_stmt);
706 return true;
709 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
710 return false;
712 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
713 return false;
715 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
716 a type 2 times bigger than HALF_TYPE. */
717 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
718 TYPE_UNSIGNED (type));
719 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
720 || (code == LSHIFT_EXPR
721 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
722 return false;
724 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
725 *oprnd = gimple_assign_rhs1 (def_stmt);
726 new_oprnd = make_ssa_name (new_type);
727 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
728 *oprnd = new_oprnd;
730 *half_type = new_type;
731 return true;
735 /* Function vect_recog_widen_mult_pattern
737 Try to find the following pattern:
739 type1 a_t;
740 type2 b_t;
741 TYPE a_T, b_T, prod_T;
743 S1 a_t = ;
744 S2 b_t = ;
745 S3 a_T = (TYPE) a_t;
746 S4 b_T = (TYPE) b_t;
747 S5 prod_T = a_T * b_T;
749 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
751 Also detect unsigned cases:
753 unsigned type1 a_t;
754 unsigned type2 b_t;
755 unsigned TYPE u_prod_T;
756 TYPE a_T, b_T, prod_T;
758 S1 a_t = ;
759 S2 b_t = ;
760 S3 a_T = (TYPE) a_t;
761 S4 b_T = (TYPE) b_t;
762 S5 prod_T = a_T * b_T;
763 S6 u_prod_T = (unsigned TYPE) prod_T;
765 and multiplication by constants:
767 type a_t;
768 TYPE a_T, prod_T;
770 S1 a_t = ;
771 S3 a_T = (TYPE) a_t;
772 S5 prod_T = a_T * CONST;
774 A special case of multiplication by constants is when 'TYPE' is 4 times
775 bigger than 'type', but CONST fits an intermediate type 2 times smaller
776 than 'TYPE'. In that case we create an additional pattern stmt for S3
777 to create a variable of the intermediate type, and perform widen-mult
778 on the intermediate type as well:
780 type a_t;
781 interm_type a_it;
782 TYPE a_T, prod_T, prod_T';
784 S1 a_t = ;
785 S3 a_T = (TYPE) a_t;
786 '--> a_it = (interm_type) a_t;
787 S5 prod_T = a_T * CONST;
788 '--> prod_T' = a_it w* CONST;
790 Input/Output:
792 * STMTS: Contains a stmt from which the pattern search begins. In the
793 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
794 is detected. In case of unsigned widen-mult, the original stmt (S5) is
795 replaced with S6 in STMTS. In case of multiplication by a constant
796 of an intermediate type (the last case above), STMTS also contains S3
797 (inserted before S5).
799 Output:
801 * TYPE_IN: The type of the input arguments to the pattern.
803 * TYPE_OUT: The type of the output of this pattern.
805 * Return value: A new stmt that will be used to replace the sequence of
806 stmts that constitute the pattern. In this case it will be:
807 WIDEN_MULT <a_t, b_t>
808 If the result of WIDEN_MULT needs to be converted to a larger type, the
809 returned stmt will be this type conversion stmt.
812 static gimple *
813 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
814 tree *type_in, tree *type_out)
816 gimple *last_stmt = stmts->pop ();
817 gimple *def_stmt0, *def_stmt1;
818 tree oprnd0, oprnd1;
819 tree type, half_type0, half_type1;
820 gimple *new_stmt = NULL, *pattern_stmt = NULL;
821 tree vectype, vecitype;
822 tree var;
823 enum tree_code dummy_code;
824 int dummy_int;
825 vec<tree> dummy_vec;
826 bool op1_ok;
827 bool promotion;
829 if (!is_gimple_assign (last_stmt))
830 return NULL;
832 type = gimple_expr_type (last_stmt);
834 /* Starting from LAST_STMT, follow the defs of its uses in search
835 of the above pattern. */
837 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
838 return NULL;
840 oprnd0 = gimple_assign_rhs1 (last_stmt);
841 oprnd1 = gimple_assign_rhs2 (last_stmt);
842 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
843 || !types_compatible_p (TREE_TYPE (oprnd1), type))
844 return NULL;
846 /* Check argument 0. */
847 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
848 &promotion)
849 || !promotion)
850 return NULL;
851 /* Check argument 1. */
852 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
853 &def_stmt1, &promotion);
855 if (op1_ok && promotion)
857 oprnd0 = gimple_assign_rhs1 (def_stmt0);
858 oprnd1 = gimple_assign_rhs1 (def_stmt1);
860 else
862 if (TREE_CODE (oprnd1) == INTEGER_CST
863 && TREE_CODE (half_type0) == INTEGER_TYPE
864 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
865 &oprnd0, &new_stmt, type,
866 &half_type0, def_stmt0))
868 half_type1 = half_type0;
869 oprnd1 = fold_convert (half_type1, oprnd1);
871 else
872 return NULL;
875 /* If the two arguments have different sizes, convert the one with
876 the smaller type into the larger type. */
877 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
879 /* If we already used up the single-stmt slot give up. */
880 if (new_stmt)
881 return NULL;
883 tree* oprnd = NULL;
884 gimple *def_stmt = NULL;
886 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
888 def_stmt = def_stmt0;
889 half_type0 = half_type1;
890 oprnd = &oprnd0;
892 else
894 def_stmt = def_stmt1;
895 half_type1 = half_type0;
896 oprnd = &oprnd1;
899 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
900 tree new_oprnd = make_ssa_name (half_type0);
901 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
902 *oprnd = new_oprnd;
905 /* Handle unsigned case. Look for
906 S6 u_prod_T = (unsigned TYPE) prod_T;
907 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
908 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
910 gimple *use_stmt;
911 tree use_lhs;
912 tree use_type;
914 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
915 return NULL;
917 use_stmt = vect_single_imm_use (last_stmt);
918 if (!use_stmt || !is_gimple_assign (use_stmt)
919 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
920 return NULL;
922 use_lhs = gimple_assign_lhs (use_stmt);
923 use_type = TREE_TYPE (use_lhs);
924 if (!INTEGRAL_TYPE_P (use_type)
925 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
926 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
927 return NULL;
929 type = use_type;
930 last_stmt = use_stmt;
933 if (!types_compatible_p (half_type0, half_type1))
934 return NULL;
936 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
937 to get an intermediate result of type ITYPE. In this case we need
938 to build a statement to convert this intermediate result to type TYPE. */
939 tree itype = type;
940 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
941 itype = build_nonstandard_integer_type
942 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
943 TYPE_UNSIGNED (type));
945 /* Pattern detected. */
946 if (dump_enabled_p ())
947 dump_printf_loc (MSG_NOTE, vect_location,
948 "vect_recog_widen_mult_pattern: detected:\n");
950 /* Check target support */
951 vectype = get_vectype_for_scalar_type (half_type0);
952 vecitype = get_vectype_for_scalar_type (itype);
953 if (!vectype
954 || !vecitype
955 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
956 vecitype, vectype,
957 &dummy_code, &dummy_code,
958 &dummy_int, &dummy_vec))
959 return NULL;
961 *type_in = vectype;
962 *type_out = get_vectype_for_scalar_type (type);
964 /* Pattern supported. Create a stmt to be used to replace the pattern: */
965 var = vect_recog_temp_ssa_var (itype, NULL);
966 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
968 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
969 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
971 /* If the original two operands have different sizes, we may need to convert
972 the smaller one into the larget type. If this is the case, at this point
973 the new stmt is already built. */
974 if (new_stmt)
976 append_pattern_def_seq (stmt_vinfo, new_stmt);
977 stmt_vec_info new_stmt_info
978 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
979 set_vinfo_for_stmt (new_stmt, new_stmt_info);
980 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
983 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
984 the result of the widen-mult operation into type TYPE. */
985 if (itype != type)
987 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
988 stmt_vec_info pattern_stmt_info
989 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
990 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
991 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
992 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
993 NOP_EXPR,
994 gimple_assign_lhs (pattern_stmt));
997 if (dump_enabled_p ())
998 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1000 stmts->safe_push (last_stmt);
1001 return pattern_stmt;
1005 /* Function vect_recog_pow_pattern
1007 Try to find the following pattern:
1009 x = POW (y, N);
1011 with POW being one of pow, powf, powi, powif and N being
1012 either 2 or 0.5.
1014 Input:
1016 * LAST_STMT: A stmt from which the pattern search begins.
1018 Output:
1020 * TYPE_IN: The type of the input arguments to the pattern.
1022 * TYPE_OUT: The type of the output of this pattern.
1024 * Return value: A new stmt that will be used to replace the sequence of
1025 stmts that constitute the pattern. In this case it will be:
1026 x = x * x
1028 x = sqrt (x)
1031 static gimple *
1032 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1033 tree *type_out)
1035 gimple *last_stmt = (*stmts)[0];
1036 tree base, exp = NULL;
1037 gimple *stmt;
1038 tree var;
1040 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1041 return NULL;
1043 switch (gimple_call_combined_fn (last_stmt))
1045 CASE_CFN_POW:
1046 CASE_CFN_POWI:
1047 base = gimple_call_arg (last_stmt, 0);
1048 exp = gimple_call_arg (last_stmt, 1);
1049 if (TREE_CODE (exp) != REAL_CST
1050 && TREE_CODE (exp) != INTEGER_CST)
1051 return NULL;
1052 break;
1054 default:
1055 return NULL;
1058 /* We now have a pow or powi builtin function call with a constant
1059 exponent. */
1061 *type_out = NULL_TREE;
1063 /* Catch squaring. */
1064 if ((tree_fits_shwi_p (exp)
1065 && tree_to_shwi (exp) == 2)
1066 || (TREE_CODE (exp) == REAL_CST
1067 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1069 *type_in = TREE_TYPE (base);
1071 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1072 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1073 return stmt;
1076 /* Catch square root. */
1077 if (TREE_CODE (exp) == REAL_CST
1078 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1080 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1081 if (*type_in
1082 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1083 OPTIMIZE_FOR_SPEED))
1085 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1086 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1087 gimple_call_set_lhs (stmt, var);
1088 return stmt;
1092 return NULL;
1096 /* Function vect_recog_widen_sum_pattern
1098 Try to find the following pattern:
1100 type x_t;
1101 TYPE x_T, sum = init;
1102 loop:
1103 sum_0 = phi <init, sum_1>
1104 S1 x_t = *p;
1105 S2 x_T = (TYPE) x_t;
1106 S3 sum_1 = x_T + sum_0;
1108 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1109 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1110 a special case of a reduction computation.
1112 Input:
1114 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1115 when this function is called with S3, the pattern {S2,S3} will be detected.
1117 Output:
1119 * TYPE_IN: The type of the input arguments to the pattern.
1121 * TYPE_OUT: The type of the output of this pattern.
1123 * Return value: A new stmt that will be used to replace the sequence of
1124 stmts that constitute the pattern. In this case it will be:
1125 WIDEN_SUM <x_t, sum_0>
1127 Note: The widening-sum idiom is a widening reduction pattern that is
1128 vectorized without preserving all the intermediate results. It
1129 produces only N/2 (widened) results (by summing up pairs of
1130 intermediate results) rather than all N results. Therefore, we
1131 cannot allow this pattern when we want to get all the results and in
1132 the correct order (as is the case when this computation is in an
1133 inner-loop nested in an outer-loop that us being vectorized). */
1135 static gimple *
1136 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1137 tree *type_out)
1139 gimple *stmt, *last_stmt = (*stmts)[0];
1140 tree oprnd0, oprnd1;
1141 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1142 tree type, half_type;
1143 gimple *pattern_stmt;
1144 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1145 struct loop *loop;
1146 tree var;
1147 bool promotion;
1149 if (!loop_info)
1150 return NULL;
1152 loop = LOOP_VINFO_LOOP (loop_info);
1154 /* We don't allow changing the order of the computation in the inner-loop
1155 when doing outer-loop vectorization. */
1156 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1157 return NULL;
1159 if (!is_gimple_assign (last_stmt))
1160 return NULL;
1162 type = gimple_expr_type (last_stmt);
1164 /* Look for the following pattern
1165 DX = (TYPE) X;
1166 sum_1 = DX + sum_0;
1167 In which DX is at least double the size of X, and sum_1 has been
1168 recognized as a reduction variable.
1171 /* Starting from LAST_STMT, follow the defs of its uses in search
1172 of the above pattern. */
1174 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1175 return NULL;
1177 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
1178 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
1179 return NULL;
1181 oprnd0 = gimple_assign_rhs1 (last_stmt);
1182 oprnd1 = gimple_assign_rhs2 (last_stmt);
1183 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1184 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1185 return NULL;
1187 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1188 we know that oprnd1 is the reduction variable (defined by a loop-header
1189 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1190 Left to check that oprnd0 is defined by a cast from type 'type' to type
1191 'TYPE'. */
1193 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1194 &promotion)
1195 || !promotion)
1196 return NULL;
1198 oprnd0 = gimple_assign_rhs1 (stmt);
1199 *type_in = half_type;
1200 *type_out = type;
1202 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1203 var = vect_recog_temp_ssa_var (type, NULL);
1204 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1206 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_NOTE, vect_location,
1209 "vect_recog_widen_sum_pattern: detected: ");
1210 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1213 return pattern_stmt;
1217 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1219 Input:
1220 STMT - a statement to check.
1221 DEF - we support operations with two operands, one of which is constant.
1222 The other operand can be defined by a demotion operation, or by a
1223 previous statement in a sequence of over-promoted operations. In the
1224 later case DEF is used to replace that operand. (It is defined by a
1225 pattern statement we created for the previous statement in the
1226 sequence).
1228 Input/output:
1229 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1230 NULL, it's the type of DEF.
1231 STMTS - additional pattern statements. If a pattern statement (type
1232 conversion) is created in this function, its original statement is
1233 added to STMTS.
1235 Output:
1236 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1237 operands to use in the new pattern statement for STMT (will be created
1238 in vect_recog_over_widening_pattern ()).
1239 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1240 statements for STMT: the first one is a type promotion and the second
1241 one is the operation itself. We return the type promotion statement
1242 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1243 the second pattern statement. */
1245 static bool
1246 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1247 tree *op0, tree *op1, gimple **new_def_stmt,
1248 vec<gimple *> *stmts)
1250 enum tree_code code;
1251 tree const_oprnd, oprnd;
1252 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1253 gimple *def_stmt, *new_stmt;
1254 bool first = false;
1255 bool promotion;
1257 *op0 = NULL_TREE;
1258 *op1 = NULL_TREE;
1259 *new_def_stmt = NULL;
1261 if (!is_gimple_assign (stmt))
1262 return false;
1264 code = gimple_assign_rhs_code (stmt);
1265 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1266 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1267 return false;
1269 oprnd = gimple_assign_rhs1 (stmt);
1270 const_oprnd = gimple_assign_rhs2 (stmt);
1271 type = gimple_expr_type (stmt);
1273 if (TREE_CODE (oprnd) != SSA_NAME
1274 || TREE_CODE (const_oprnd) != INTEGER_CST)
1275 return false;
1277 /* If oprnd has other uses besides that in stmt we cannot mark it
1278 as being part of a pattern only. */
1279 if (!has_single_use (oprnd))
1280 return false;
1282 /* If we are in the middle of a sequence, we use DEF from a previous
1283 statement. Otherwise, OPRND has to be a result of type promotion. */
1284 if (*new_type)
1286 half_type = *new_type;
1287 oprnd = def;
1289 else
1291 first = true;
1292 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1293 &promotion)
1294 || !promotion
1295 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1296 return false;
1299 /* Can we perform the operation on a smaller type? */
1300 switch (code)
1302 case BIT_IOR_EXPR:
1303 case BIT_XOR_EXPR:
1304 case BIT_AND_EXPR:
1305 if (!int_fits_type_p (const_oprnd, half_type))
1307 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1308 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1309 return false;
1311 interm_type = build_nonstandard_integer_type (
1312 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1313 if (!int_fits_type_p (const_oprnd, interm_type))
1314 return false;
1317 break;
1319 case LSHIFT_EXPR:
1320 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1321 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1322 return false;
1324 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1325 (e.g., if the original value was char, the shift amount is at most 8
1326 if we want to use short). */
1327 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1328 return false;
1330 interm_type = build_nonstandard_integer_type (
1331 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1333 if (!vect_supportable_shift (code, interm_type))
1334 return false;
1336 break;
1338 case RSHIFT_EXPR:
1339 if (vect_supportable_shift (code, half_type))
1340 break;
1342 /* Try intermediate type - HALF_TYPE is not supported. */
1343 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1344 return false;
1346 interm_type = build_nonstandard_integer_type (
1347 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1349 if (!vect_supportable_shift (code, interm_type))
1350 return false;
1352 break;
1354 default:
1355 gcc_unreachable ();
1358 /* There are four possible cases:
1359 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1360 the first statement in the sequence)
1361 a. The original, HALF_TYPE, is not enough - we replace the promotion
1362 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1363 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1364 promotion.
1365 2. OPRND is defined by a pattern statement we created.
1366 a. Its type is not sufficient for the operation, we create a new stmt:
1367 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1368 this statement in NEW_DEF_STMT, and it is later put in
1369 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1370 b. OPRND is good to use in the new statement. */
1371 if (first)
1373 if (interm_type)
1375 /* Replace the original type conversion HALF_TYPE->TYPE with
1376 HALF_TYPE->INTERM_TYPE. */
1377 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1379 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1380 /* Check if the already created pattern stmt is what we need. */
1381 if (!is_gimple_assign (new_stmt)
1382 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1383 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1384 return false;
1386 stmts->safe_push (def_stmt);
1387 oprnd = gimple_assign_lhs (new_stmt);
1389 else
1391 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1392 oprnd = gimple_assign_rhs1 (def_stmt);
1393 new_oprnd = make_ssa_name (interm_type);
1394 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1395 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1396 stmts->safe_push (def_stmt);
1397 oprnd = new_oprnd;
1400 else
1402 /* Retrieve the operand before the type promotion. */
1403 oprnd = gimple_assign_rhs1 (def_stmt);
1406 else
1408 if (interm_type)
1410 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1411 new_oprnd = make_ssa_name (interm_type);
1412 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1413 oprnd = new_oprnd;
1414 *new_def_stmt = new_stmt;
1417 /* Otherwise, OPRND is already set. */
1420 if (interm_type)
1421 *new_type = interm_type;
1422 else
1423 *new_type = half_type;
1425 *op0 = oprnd;
1426 *op1 = fold_convert (*new_type, const_oprnd);
1428 return true;
1432 /* Try to find a statement or a sequence of statements that can be performed
1433 on a smaller type:
1435 type x_t;
1436 TYPE x_T, res0_T, res1_T;
1437 loop:
1438 S1 x_t = *p;
1439 S2 x_T = (TYPE) x_t;
1440 S3 res0_T = op (x_T, C0);
1441 S4 res1_T = op (res0_T, C1);
1442 S5 ... = () res1_T; - type demotion
1444 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1445 constants.
1446 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1447 be 'type' or some intermediate type. For now, we expect S5 to be a type
1448 demotion operation. We also check that S3 and S4 have only one use. */
1450 static gimple *
1451 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1452 tree *type_in, tree *type_out)
1454 gimple *stmt = stmts->pop ();
1455 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1456 *use_stmt = NULL;
1457 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1458 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1459 bool first;
1460 tree type = NULL;
1462 first = true;
1463 while (1)
1465 if (!vinfo_for_stmt (stmt)
1466 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1467 return NULL;
1469 new_def_stmt = NULL;
1470 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1471 &op0, &op1, &new_def_stmt,
1472 stmts))
1474 if (first)
1475 return NULL;
1476 else
1477 break;
1480 /* STMT can be performed on a smaller type. Check its uses. */
1481 use_stmt = vect_single_imm_use (stmt);
1482 if (!use_stmt || !is_gimple_assign (use_stmt))
1483 return NULL;
1485 /* Create pattern statement for STMT. */
1486 vectype = get_vectype_for_scalar_type (new_type);
1487 if (!vectype)
1488 return NULL;
1490 /* We want to collect all the statements for which we create pattern
1491 statetments, except for the case when the last statement in the
1492 sequence doesn't have a corresponding pattern statement. In such
1493 case we associate the last pattern statement with the last statement
1494 in the sequence. Therefore, we only add the original statement to
1495 the list if we know that it is not the last. */
1496 if (prev_stmt)
1497 stmts->safe_push (prev_stmt);
1499 var = vect_recog_temp_ssa_var (new_type, NULL);
1500 pattern_stmt
1501 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1502 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1503 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1505 if (dump_enabled_p ())
1507 dump_printf_loc (MSG_NOTE, vect_location,
1508 "created pattern stmt: ");
1509 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1512 type = gimple_expr_type (stmt);
1513 prev_stmt = stmt;
1514 stmt = use_stmt;
1516 first = false;
1519 /* We got a sequence. We expect it to end with a type demotion operation.
1520 Otherwise, we quit (for now). There are three possible cases: the
1521 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1522 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1523 NEW_TYPE differs (we create a new conversion statement). */
1524 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1526 use_lhs = gimple_assign_lhs (use_stmt);
1527 use_type = TREE_TYPE (use_lhs);
1528 /* Support only type demotion or signedess change. */
1529 if (!INTEGRAL_TYPE_P (use_type)
1530 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1531 return NULL;
1533 /* Check that NEW_TYPE is not bigger than the conversion result. */
1534 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1535 return NULL;
1537 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1538 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1540 /* Create NEW_TYPE->USE_TYPE conversion. */
1541 new_oprnd = make_ssa_name (use_type);
1542 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1543 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1545 *type_in = get_vectype_for_scalar_type (new_type);
1546 *type_out = get_vectype_for_scalar_type (use_type);
1548 /* We created a pattern statement for the last statement in the
1549 sequence, so we don't need to associate it with the pattern
1550 statement created for PREV_STMT. Therefore, we add PREV_STMT
1551 to the list in order to mark it later in vect_pattern_recog_1. */
1552 if (prev_stmt)
1553 stmts->safe_push (prev_stmt);
1555 else
1557 if (prev_stmt)
1558 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1559 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1561 *type_in = vectype;
1562 *type_out = NULL_TREE;
1565 stmts->safe_push (use_stmt);
1567 else
1568 /* TODO: support general case, create a conversion to the correct type. */
1569 return NULL;
1571 /* Pattern detected. */
1572 if (dump_enabled_p ())
1574 dump_printf_loc (MSG_NOTE, vect_location,
1575 "vect_recog_over_widening_pattern: detected: ");
1576 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1579 return pattern_stmt;
1582 /* Detect widening shift pattern:
1584 type a_t;
1585 TYPE a_T, res_T;
1587 S1 a_t = ;
1588 S2 a_T = (TYPE) a_t;
1589 S3 res_T = a_T << CONST;
1591 where type 'TYPE' is at least double the size of type 'type'.
1593 Also detect cases where the shift result is immediately converted
1594 to another type 'result_type' that is no larger in size than 'TYPE'.
1595 In those cases we perform a widen-shift that directly results in
1596 'result_type', to avoid a possible over-widening situation:
1598 type a_t;
1599 TYPE a_T, res_T;
1600 result_type res_result;
1602 S1 a_t = ;
1603 S2 a_T = (TYPE) a_t;
1604 S3 res_T = a_T << CONST;
1605 S4 res_result = (result_type) res_T;
1606 '--> res_result' = a_t w<< CONST;
1608 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1609 create an additional pattern stmt for S2 to create a variable of an
1610 intermediate type, and perform widen-shift on the intermediate type:
1612 type a_t;
1613 interm_type a_it;
1614 TYPE a_T, res_T, res_T';
1616 S1 a_t = ;
1617 S2 a_T = (TYPE) a_t;
1618 '--> a_it = (interm_type) a_t;
1619 S3 res_T = a_T << CONST;
1620 '--> res_T' = a_it <<* CONST;
1622 Input/Output:
1624 * STMTS: Contains a stmt from which the pattern search begins.
1625 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1626 in STMTS. When an intermediate type is used and a pattern statement is
1627 created for S2, we also put S2 here (before S3).
1629 Output:
1631 * TYPE_IN: The type of the input arguments to the pattern.
1633 * TYPE_OUT: The type of the output of this pattern.
1635 * Return value: A new stmt that will be used to replace the sequence of
1636 stmts that constitute the pattern. In this case it will be:
1637 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1639 static gimple *
1640 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1641 tree *type_in, tree *type_out)
1643 gimple *last_stmt = stmts->pop ();
1644 gimple *def_stmt0;
1645 tree oprnd0, oprnd1;
1646 tree type, half_type0;
1647 gimple *pattern_stmt;
1648 tree vectype, vectype_out = NULL_TREE;
1649 tree var;
1650 enum tree_code dummy_code;
1651 int dummy_int;
1652 vec<tree> dummy_vec;
1653 gimple *use_stmt;
1654 bool promotion;
1656 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1657 return NULL;
1659 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1660 return NULL;
1662 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1663 return NULL;
1665 oprnd0 = gimple_assign_rhs1 (last_stmt);
1666 oprnd1 = gimple_assign_rhs2 (last_stmt);
1667 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1668 return NULL;
1670 /* Check operand 0: it has to be defined by a type promotion. */
1671 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1672 &promotion)
1673 || !promotion)
1674 return NULL;
1676 /* Check operand 1: has to be positive. We check that it fits the type
1677 in vect_handle_widen_op_by_const (). */
1678 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1679 return NULL;
1681 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1682 type = gimple_expr_type (last_stmt);
1684 /* Check for subsequent conversion to another type. */
1685 use_stmt = vect_single_imm_use (last_stmt);
1686 if (use_stmt && is_gimple_assign (use_stmt)
1687 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1688 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1690 tree use_lhs = gimple_assign_lhs (use_stmt);
1691 tree use_type = TREE_TYPE (use_lhs);
1693 if (INTEGRAL_TYPE_P (use_type)
1694 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1696 last_stmt = use_stmt;
1697 type = use_type;
1701 /* Check if this a widening operation. */
1702 gimple *wstmt = NULL;
1703 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1704 &oprnd0, &wstmt,
1705 type, &half_type0, def_stmt0))
1706 return NULL;
1708 /* Pattern detected. */
1709 if (dump_enabled_p ())
1710 dump_printf_loc (MSG_NOTE, vect_location,
1711 "vect_recog_widen_shift_pattern: detected:\n");
1713 /* Check target support. */
1714 vectype = get_vectype_for_scalar_type (half_type0);
1715 vectype_out = get_vectype_for_scalar_type (type);
1717 if (!vectype
1718 || !vectype_out
1719 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1720 vectype_out, vectype,
1721 &dummy_code, &dummy_code,
1722 &dummy_int, &dummy_vec))
1723 return NULL;
1725 *type_in = vectype;
1726 *type_out = vectype_out;
1728 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1729 var = vect_recog_temp_ssa_var (type, NULL);
1730 pattern_stmt =
1731 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1732 if (wstmt)
1734 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1735 new_pattern_def_seq (stmt_vinfo, wstmt);
1736 stmt_vec_info new_stmt_info
1737 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1738 set_vinfo_for_stmt (wstmt, new_stmt_info);
1739 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1742 if (dump_enabled_p ())
1743 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1745 stmts->safe_push (last_stmt);
1746 return pattern_stmt;
1749 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1751 type a_t, b_t, c_t;
1753 S0 a_t = b_t r<< c_t;
1755 Input/Output:
1757 * STMTS: Contains a stmt from which the pattern search begins,
1758 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1759 with a sequence:
1761 S1 d_t = -c_t;
1762 S2 e_t = d_t & (B - 1);
1763 S3 f_t = b_t << c_t;
1764 S4 g_t = b_t >> e_t;
1765 S0 a_t = f_t | g_t;
1767 where B is element bitsize of type.
1769 Output:
1771 * TYPE_IN: The type of the input arguments to the pattern.
1773 * TYPE_OUT: The type of the output of this pattern.
1775 * Return value: A new stmt that will be used to replace the rotate
1776 S0 stmt. */
1778 static gimple *
1779 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1781 gimple *last_stmt = stmts->pop ();
1782 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1783 gimple *pattern_stmt, *def_stmt;
1784 enum tree_code rhs_code;
1785 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1786 vec_info *vinfo = stmt_vinfo->vinfo;
1787 enum vect_def_type dt;
1788 optab optab1, optab2;
1789 edge ext_def = NULL;
1791 if (!is_gimple_assign (last_stmt))
1792 return NULL;
1794 rhs_code = gimple_assign_rhs_code (last_stmt);
1795 switch (rhs_code)
1797 case LROTATE_EXPR:
1798 case RROTATE_EXPR:
1799 break;
1800 default:
1801 return NULL;
1804 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1805 return NULL;
1807 lhs = gimple_assign_lhs (last_stmt);
1808 oprnd0 = gimple_assign_rhs1 (last_stmt);
1809 type = TREE_TYPE (oprnd0);
1810 oprnd1 = gimple_assign_rhs2 (last_stmt);
1811 if (TREE_CODE (oprnd0) != SSA_NAME
1812 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1813 || !INTEGRAL_TYPE_P (type)
1814 || !TYPE_UNSIGNED (type))
1815 return NULL;
1817 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1818 return NULL;
1820 if (dt != vect_internal_def
1821 && dt != vect_constant_def
1822 && dt != vect_external_def)
1823 return NULL;
1825 vectype = get_vectype_for_scalar_type (type);
1826 if (vectype == NULL_TREE)
1827 return NULL;
1829 /* If vector/vector or vector/scalar rotate is supported by the target,
1830 don't do anything here. */
1831 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1832 if (optab1
1833 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1834 return NULL;
1836 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1838 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1839 if (optab2
1840 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1841 return NULL;
1844 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1845 don't do anything here either. */
1846 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1847 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1848 if (!optab1
1849 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1850 || !optab2
1851 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1853 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1854 return NULL;
1855 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1856 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1857 if (!optab1
1858 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1859 || !optab2
1860 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1861 return NULL;
1864 *type_in = vectype;
1865 *type_out = vectype;
1866 if (*type_in == NULL_TREE)
1867 return NULL;
1869 if (dt == vect_external_def
1870 && TREE_CODE (oprnd1) == SSA_NAME
1871 && is_a <loop_vec_info> (vinfo))
1873 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1874 ext_def = loop_preheader_edge (loop);
1875 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1877 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1878 if (bb == NULL
1879 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1880 ext_def = NULL;
1884 def = NULL_TREE;
1885 if (TREE_CODE (oprnd1) == INTEGER_CST
1886 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1887 def = oprnd1;
1888 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1890 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1891 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1892 && TYPE_PRECISION (TREE_TYPE (rhs1))
1893 == TYPE_PRECISION (type))
1894 def = rhs1;
1897 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1898 if (def == NULL_TREE)
1900 def = vect_recog_temp_ssa_var (type, NULL);
1901 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1902 if (ext_def)
1904 basic_block new_bb
1905 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1906 gcc_assert (!new_bb);
1908 else
1909 append_pattern_def_seq (stmt_vinfo, def_stmt);
1911 stype = TREE_TYPE (def);
1913 if (TREE_CODE (def) == INTEGER_CST)
1915 if (!tree_fits_uhwi_p (def)
1916 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1917 || integer_zerop (def))
1918 return NULL;
1919 def2 = build_int_cst (stype,
1920 GET_MODE_PRECISION (TYPE_MODE (type))
1921 - tree_to_uhwi (def));
1923 else
1925 tree vecstype = get_vectype_for_scalar_type (stype);
1926 stmt_vec_info def_stmt_vinfo;
1928 if (vecstype == NULL_TREE)
1929 return NULL;
1930 def2 = vect_recog_temp_ssa_var (stype, NULL);
1931 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1932 if (ext_def)
1934 basic_block new_bb
1935 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1936 gcc_assert (!new_bb);
1938 else
1940 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1941 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1942 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1943 append_pattern_def_seq (stmt_vinfo, def_stmt);
1946 def2 = vect_recog_temp_ssa_var (stype, NULL);
1947 tree mask
1948 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1949 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1950 gimple_assign_lhs (def_stmt), mask);
1951 if (ext_def)
1953 basic_block new_bb
1954 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1955 gcc_assert (!new_bb);
1957 else
1959 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1960 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1961 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1962 append_pattern_def_seq (stmt_vinfo, def_stmt);
1966 var1 = vect_recog_temp_ssa_var (type, NULL);
1967 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1968 ? LSHIFT_EXPR : RSHIFT_EXPR,
1969 oprnd0, def);
1970 append_pattern_def_seq (stmt_vinfo, def_stmt);
1972 var2 = vect_recog_temp_ssa_var (type, NULL);
1973 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1974 ? RSHIFT_EXPR : LSHIFT_EXPR,
1975 oprnd0, def2);
1976 append_pattern_def_seq (stmt_vinfo, def_stmt);
1978 /* Pattern detected. */
1979 if (dump_enabled_p ())
1980 dump_printf_loc (MSG_NOTE, vect_location,
1981 "vect_recog_rotate_pattern: detected:\n");
1983 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1984 var = vect_recog_temp_ssa_var (type, NULL);
1985 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1987 if (dump_enabled_p ())
1988 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1990 stmts->safe_push (last_stmt);
1991 return pattern_stmt;
1994 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1995 vectorized:
1997 type a_t;
1998 TYPE b_T, res_T;
2000 S1 a_t = ;
2001 S2 b_T = ;
2002 S3 res_T = b_T op a_t;
2004 where type 'TYPE' is a type with different size than 'type',
2005 and op is <<, >> or rotate.
2007 Also detect cases:
2009 type a_t;
2010 TYPE b_T, c_T, res_T;
2012 S0 c_T = ;
2013 S1 a_t = (type) c_T;
2014 S2 b_T = ;
2015 S3 res_T = b_T op a_t;
2017 Input/Output:
2019 * STMTS: Contains a stmt from which the pattern search begins,
2020 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2021 with a shift/rotate which has same type on both operands, in the
2022 second case just b_T op c_T, in the first case with added cast
2023 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2025 Output:
2027 * TYPE_IN: The type of the input arguments to the pattern.
2029 * TYPE_OUT: The type of the output of this pattern.
2031 * Return value: A new stmt that will be used to replace the shift/rotate
2032 S3 stmt. */
2034 static gimple *
2035 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2036 tree *type_in, tree *type_out)
2038 gimple *last_stmt = stmts->pop ();
2039 tree oprnd0, oprnd1, lhs, var;
2040 gimple *pattern_stmt, *def_stmt;
2041 enum tree_code rhs_code;
2042 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2043 vec_info *vinfo = stmt_vinfo->vinfo;
2044 enum vect_def_type dt;
2046 if (!is_gimple_assign (last_stmt))
2047 return NULL;
2049 rhs_code = gimple_assign_rhs_code (last_stmt);
2050 switch (rhs_code)
2052 case LSHIFT_EXPR:
2053 case RSHIFT_EXPR:
2054 case LROTATE_EXPR:
2055 case RROTATE_EXPR:
2056 break;
2057 default:
2058 return NULL;
2061 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2062 return NULL;
2064 lhs = gimple_assign_lhs (last_stmt);
2065 oprnd0 = gimple_assign_rhs1 (last_stmt);
2066 oprnd1 = gimple_assign_rhs2 (last_stmt);
2067 if (TREE_CODE (oprnd0) != SSA_NAME
2068 || TREE_CODE (oprnd1) != SSA_NAME
2069 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2070 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2071 || TYPE_PRECISION (TREE_TYPE (lhs))
2072 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2073 return NULL;
2075 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2076 return NULL;
2078 if (dt != vect_internal_def)
2079 return NULL;
2081 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2082 *type_out = *type_in;
2083 if (*type_in == NULL_TREE)
2084 return NULL;
2086 tree def = NULL_TREE;
2087 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2088 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2090 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2091 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2092 && TYPE_PRECISION (TREE_TYPE (rhs1))
2093 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2095 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2096 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2097 def = rhs1;
2098 else
2100 tree mask
2101 = build_low_bits_mask (TREE_TYPE (rhs1),
2102 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2103 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2104 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2105 new_pattern_def_seq (stmt_vinfo, def_stmt);
2110 if (def == NULL_TREE)
2112 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2113 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2114 new_pattern_def_seq (stmt_vinfo, def_stmt);
2117 /* Pattern detected. */
2118 if (dump_enabled_p ())
2119 dump_printf_loc (MSG_NOTE, vect_location,
2120 "vect_recog_vector_vector_shift_pattern: detected:\n");
2122 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2123 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2124 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2126 if (dump_enabled_p ())
2127 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2129 stmts->safe_push (last_stmt);
2130 return pattern_stmt;
2133 /* Return true iff the target has a vector optab implementing the operation
2134 CODE on type VECTYPE. */
2136 static bool
2137 target_has_vecop_for_code (tree_code code, tree vectype)
2139 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2140 return voptab
2141 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2144 /* Verify that the target has optabs of VECTYPE to perform all the steps
2145 needed by the multiplication-by-immediate synthesis algorithm described by
2146 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2147 present. Return true iff the target supports all the steps. */
2149 static bool
2150 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2151 tree vectype, bool synth_shift_p)
2153 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2154 return false;
2156 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2157 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2159 if (var == negate_variant
2160 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2161 return false;
2163 /* If we must synthesize shifts with additions make sure that vector
2164 addition is available. */
2165 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2166 return false;
2168 for (int i = 1; i < alg->ops; i++)
2170 switch (alg->op[i])
2172 case alg_shift:
2173 break;
2174 case alg_add_t_m2:
2175 case alg_add_t2_m:
2176 case alg_add_factor:
2177 if (!supports_vplus)
2178 return false;
2179 break;
2180 case alg_sub_t_m2:
2181 case alg_sub_t2_m:
2182 case alg_sub_factor:
2183 if (!supports_vminus)
2184 return false;
2185 break;
2186 case alg_unknown:
2187 case alg_m:
2188 case alg_zero:
2189 case alg_impossible:
2190 return false;
2191 default:
2192 gcc_unreachable ();
2196 return true;
2199 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2200 putting the final result in DEST. Append all statements but the last into
2201 VINFO. Return the last statement. */
2203 static gimple *
2204 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2205 stmt_vec_info vinfo)
2207 HOST_WIDE_INT i;
2208 tree itype = TREE_TYPE (op);
2209 tree prev_res = op;
2210 gcc_assert (amnt >= 0);
2211 for (i = 0; i < amnt; i++)
2213 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2214 : dest;
2215 gimple *stmt
2216 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2217 prev_res = tmp_var;
2218 if (i < amnt - 1)
2219 append_pattern_def_seq (vinfo, stmt);
2220 else
2221 return stmt;
2223 gcc_unreachable ();
2224 return NULL;
2227 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2228 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2229 the process if necessary. Append the resulting assignment statements
2230 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2231 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2232 left shifts using additions. */
2234 static tree
2235 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2236 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2238 if (integer_zerop (op2)
2239 && (code == LSHIFT_EXPR
2240 || code == PLUS_EXPR))
2242 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2243 return op1;
2246 gimple *stmt;
2247 tree itype = TREE_TYPE (op1);
2248 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2250 if (code == LSHIFT_EXPR
2251 && synth_shift_p)
2253 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2254 stmt_vinfo);
2255 append_pattern_def_seq (stmt_vinfo, stmt);
2256 return tmp_var;
2259 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2260 append_pattern_def_seq (stmt_vinfo, stmt);
2261 return tmp_var;
2264 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2265 and simple arithmetic operations to be vectorized. Record the statements
2266 produced in STMT_VINFO and return the last statement in the sequence or
2267 NULL if it's not possible to synthesize such a multiplication.
2268 This function mirrors the behavior of expand_mult_const in expmed.c but
2269 works on tree-ssa form. */
2271 static gimple *
2272 vect_synth_mult_by_constant (tree op, tree val,
2273 stmt_vec_info stmt_vinfo)
2275 tree itype = TREE_TYPE (op);
2276 machine_mode mode = TYPE_MODE (itype);
2277 struct algorithm alg;
2278 mult_variant variant;
2279 if (!tree_fits_shwi_p (val))
2280 return NULL;
2282 /* Multiplication synthesis by shifts, adds and subs can introduce
2283 signed overflow where the original operation didn't. Perform the
2284 operations on an unsigned type and cast back to avoid this.
2285 In the future we may want to relax this for synthesis algorithms
2286 that we can prove do not cause unexpected overflow. */
2287 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2289 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2291 /* Targets that don't support vector shifts but support vector additions
2292 can synthesize shifts that way. */
2293 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2295 HOST_WIDE_INT hwval = tree_to_shwi (val);
2296 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2297 The vectorizer's benefit analysis will decide whether it's beneficial
2298 to do this. */
2299 bool possible = choose_mult_variant (mode, hwval, &alg,
2300 &variant, MAX_COST);
2301 if (!possible)
2302 return NULL;
2304 tree vectype = get_vectype_for_scalar_type (multtype);
2306 if (!vectype
2307 || !target_supports_mult_synth_alg (&alg, variant,
2308 vectype, synth_shift_p))
2309 return NULL;
2311 tree accumulator;
2313 /* Clear out the sequence of statements so we can populate it below. */
2314 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2315 gimple *stmt = NULL;
2317 if (cast_to_unsigned_p)
2319 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2320 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2321 append_pattern_def_seq (stmt_vinfo, stmt);
2322 op = tmp_op;
2325 if (alg.op[0] == alg_zero)
2326 accumulator = build_int_cst (multtype, 0);
2327 else
2328 accumulator = op;
2330 bool needs_fixup = (variant == negate_variant)
2331 || (variant == add_variant);
2333 for (int i = 1; i < alg.ops; i++)
2335 tree shft_log = build_int_cst (multtype, alg.log[i]);
2336 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2337 tree tmp_var = NULL_TREE;
2339 switch (alg.op[i])
2341 case alg_shift:
2342 if (synth_shift_p)
2343 stmt
2344 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2345 stmt_vinfo);
2346 else
2347 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2348 shft_log);
2349 break;
2350 case alg_add_t_m2:
2351 tmp_var
2352 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2353 stmt_vinfo, synth_shift_p);
2354 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2355 tmp_var);
2356 break;
2357 case alg_sub_t_m2:
2358 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2359 shft_log, stmt_vinfo,
2360 synth_shift_p);
2361 /* In some algorithms the first step involves zeroing the
2362 accumulator. If subtracting from such an accumulator
2363 just emit the negation directly. */
2364 if (integer_zerop (accumulator))
2365 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2366 else
2367 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2368 tmp_var);
2369 break;
2370 case alg_add_t2_m:
2371 tmp_var
2372 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2373 stmt_vinfo, synth_shift_p);
2374 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2375 break;
2376 case alg_sub_t2_m:
2377 tmp_var
2378 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2379 stmt_vinfo, synth_shift_p);
2380 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2381 break;
2382 case alg_add_factor:
2383 tmp_var
2384 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2385 stmt_vinfo, synth_shift_p);
2386 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2387 tmp_var);
2388 break;
2389 case alg_sub_factor:
2390 tmp_var
2391 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2392 stmt_vinfo, synth_shift_p);
2393 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2394 accumulator);
2395 break;
2396 default:
2397 gcc_unreachable ();
2399 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2400 but rather return it directly. */
2402 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2403 append_pattern_def_seq (stmt_vinfo, stmt);
2404 accumulator = accum_tmp;
2406 if (variant == negate_variant)
2408 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2409 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2410 accumulator = accum_tmp;
2411 if (cast_to_unsigned_p)
2412 append_pattern_def_seq (stmt_vinfo, stmt);
2414 else if (variant == add_variant)
2416 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2417 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2418 accumulator = accum_tmp;
2419 if (cast_to_unsigned_p)
2420 append_pattern_def_seq (stmt_vinfo, stmt);
2422 /* Move back to a signed if needed. */
2423 if (cast_to_unsigned_p)
2425 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2426 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2429 return stmt;
2432 /* Detect multiplication by constant and convert it into a sequence of
2433 shifts and additions, subtractions, negations. We reuse the
2434 choose_mult_variant algorithms from expmed.c
2436 Input/Output:
2438 STMTS: Contains a stmt from which the pattern search begins,
2439 i.e. the mult stmt.
2441 Output:
2443 * TYPE_IN: The type of the input arguments to the pattern.
2445 * TYPE_OUT: The type of the output of this pattern.
2447 * Return value: A new stmt that will be used to replace
2448 the multiplication. */
2450 static gimple *
2451 vect_recog_mult_pattern (vec<gimple *> *stmts,
2452 tree *type_in, tree *type_out)
2454 gimple *last_stmt = stmts->pop ();
2455 tree oprnd0, oprnd1, vectype, itype;
2456 gimple *pattern_stmt;
2457 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2459 if (!is_gimple_assign (last_stmt))
2460 return NULL;
2462 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2463 return NULL;
2465 oprnd0 = gimple_assign_rhs1 (last_stmt);
2466 oprnd1 = gimple_assign_rhs2 (last_stmt);
2467 itype = TREE_TYPE (oprnd0);
2469 if (TREE_CODE (oprnd0) != SSA_NAME
2470 || TREE_CODE (oprnd1) != INTEGER_CST
2471 || !INTEGRAL_TYPE_P (itype)
2472 || !type_has_mode_precision_p (itype))
2473 return NULL;
2475 vectype = get_vectype_for_scalar_type (itype);
2476 if (vectype == NULL_TREE)
2477 return NULL;
2479 /* If the target can handle vectorized multiplication natively,
2480 don't attempt to optimize this. */
2481 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2482 if (mul_optab != unknown_optab)
2484 machine_mode vec_mode = TYPE_MODE (vectype);
2485 int icode = (int) optab_handler (mul_optab, vec_mode);
2486 if (icode != CODE_FOR_nothing)
2487 return NULL;
2490 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2491 if (!pattern_stmt)
2492 return NULL;
2494 /* Pattern detected. */
2495 if (dump_enabled_p ())
2496 dump_printf_loc (MSG_NOTE, vect_location,
2497 "vect_recog_mult_pattern: detected:\n");
2499 if (dump_enabled_p ())
2500 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2501 pattern_stmt,0);
2503 stmts->safe_push (last_stmt);
2504 *type_in = vectype;
2505 *type_out = vectype;
2507 return pattern_stmt;
2510 /* Detect a signed division by a constant that wouldn't be
2511 otherwise vectorized:
2513 type a_t, b_t;
2515 S1 a_t = b_t / N;
2517 where type 'type' is an integral type and N is a constant.
2519 Similarly handle modulo by a constant:
2521 S4 a_t = b_t % N;
2523 Input/Output:
2525 * STMTS: Contains a stmt from which the pattern search begins,
2526 i.e. the division stmt. S1 is replaced by if N is a power
2527 of two constant and type is signed:
2528 S3 y_t = b_t < 0 ? N - 1 : 0;
2529 S2 x_t = b_t + y_t;
2530 S1' a_t = x_t >> log2 (N);
2532 S4 is replaced if N is a power of two constant and
2533 type is signed by (where *_T temporaries have unsigned type):
2534 S9 y_T = b_t < 0 ? -1U : 0U;
2535 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2536 S7 z_t = (type) z_T;
2537 S6 w_t = b_t + z_t;
2538 S5 x_t = w_t & (N - 1);
2539 S4' a_t = x_t - z_t;
2541 Output:
2543 * TYPE_IN: The type of the input arguments to the pattern.
2545 * TYPE_OUT: The type of the output of this pattern.
2547 * Return value: A new stmt that will be used to replace the division
2548 S1 or modulo S4 stmt. */
2550 static gimple *
2551 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2552 tree *type_in, tree *type_out)
2554 gimple *last_stmt = stmts->pop ();
2555 tree oprnd0, oprnd1, vectype, itype, cond;
2556 gimple *pattern_stmt, *def_stmt;
2557 enum tree_code rhs_code;
2558 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2559 vec_info *vinfo = stmt_vinfo->vinfo;
2560 optab optab;
2561 tree q;
2562 int dummy_int, prec;
2563 stmt_vec_info def_stmt_vinfo;
2565 if (!is_gimple_assign (last_stmt))
2566 return NULL;
2568 rhs_code = gimple_assign_rhs_code (last_stmt);
2569 switch (rhs_code)
2571 case TRUNC_DIV_EXPR:
2572 case TRUNC_MOD_EXPR:
2573 break;
2574 default:
2575 return NULL;
2578 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2579 return NULL;
2581 oprnd0 = gimple_assign_rhs1 (last_stmt);
2582 oprnd1 = gimple_assign_rhs2 (last_stmt);
2583 itype = TREE_TYPE (oprnd0);
2584 if (TREE_CODE (oprnd0) != SSA_NAME
2585 || TREE_CODE (oprnd1) != INTEGER_CST
2586 || TREE_CODE (itype) != INTEGER_TYPE
2587 || !type_has_mode_precision_p (itype))
2588 return NULL;
2590 vectype = get_vectype_for_scalar_type (itype);
2591 if (vectype == NULL_TREE)
2592 return NULL;
2594 /* If the target can handle vectorized division or modulo natively,
2595 don't attempt to optimize this. */
2596 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2597 if (optab != unknown_optab)
2599 machine_mode vec_mode = TYPE_MODE (vectype);
2600 int icode = (int) optab_handler (optab, vec_mode);
2601 if (icode != CODE_FOR_nothing)
2602 return NULL;
2605 prec = TYPE_PRECISION (itype);
2606 if (integer_pow2p (oprnd1))
2608 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2609 return NULL;
2611 /* Pattern detected. */
2612 if (dump_enabled_p ())
2613 dump_printf_loc (MSG_NOTE, vect_location,
2614 "vect_recog_divmod_pattern: detected:\n");
2616 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2617 build_int_cst (itype, 0));
2618 if (rhs_code == TRUNC_DIV_EXPR)
2620 tree var = vect_recog_temp_ssa_var (itype, NULL);
2621 tree shift;
2622 def_stmt
2623 = gimple_build_assign (var, COND_EXPR, cond,
2624 fold_build2 (MINUS_EXPR, itype, oprnd1,
2625 build_int_cst (itype, 1)),
2626 build_int_cst (itype, 0));
2627 new_pattern_def_seq (stmt_vinfo, def_stmt);
2628 var = vect_recog_temp_ssa_var (itype, NULL);
2629 def_stmt
2630 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2631 gimple_assign_lhs (def_stmt));
2632 append_pattern_def_seq (stmt_vinfo, def_stmt);
2634 shift = build_int_cst (itype, tree_log2 (oprnd1));
2635 pattern_stmt
2636 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2637 RSHIFT_EXPR, var, shift);
2639 else
2641 tree signmask;
2642 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2643 if (compare_tree_int (oprnd1, 2) == 0)
2645 signmask = vect_recog_temp_ssa_var (itype, NULL);
2646 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2647 build_int_cst (itype, 1),
2648 build_int_cst (itype, 0));
2649 append_pattern_def_seq (stmt_vinfo, def_stmt);
2651 else
2653 tree utype
2654 = build_nonstandard_integer_type (prec, 1);
2655 tree vecutype = get_vectype_for_scalar_type (utype);
2656 tree shift
2657 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2658 - tree_log2 (oprnd1));
2659 tree var = vect_recog_temp_ssa_var (utype, NULL);
2661 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2662 build_int_cst (utype, -1),
2663 build_int_cst (utype, 0));
2664 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2665 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2666 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2667 append_pattern_def_seq (stmt_vinfo, def_stmt);
2668 var = vect_recog_temp_ssa_var (utype, NULL);
2669 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2670 gimple_assign_lhs (def_stmt),
2671 shift);
2672 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2673 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2674 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2675 append_pattern_def_seq (stmt_vinfo, def_stmt);
2676 signmask = vect_recog_temp_ssa_var (itype, NULL);
2677 def_stmt
2678 = gimple_build_assign (signmask, NOP_EXPR, var);
2679 append_pattern_def_seq (stmt_vinfo, def_stmt);
2681 def_stmt
2682 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2683 PLUS_EXPR, oprnd0, signmask);
2684 append_pattern_def_seq (stmt_vinfo, def_stmt);
2685 def_stmt
2686 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2687 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2688 fold_build2 (MINUS_EXPR, itype, oprnd1,
2689 build_int_cst (itype, 1)));
2690 append_pattern_def_seq (stmt_vinfo, def_stmt);
2692 pattern_stmt
2693 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2694 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2695 signmask);
2698 if (dump_enabled_p ())
2699 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2702 stmts->safe_push (last_stmt);
2704 *type_in = vectype;
2705 *type_out = vectype;
2706 return pattern_stmt;
2709 if (prec > HOST_BITS_PER_WIDE_INT
2710 || integer_zerop (oprnd1))
2711 return NULL;
2713 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2714 return NULL;
2716 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2718 if (TYPE_UNSIGNED (itype))
2720 unsigned HOST_WIDE_INT mh, ml;
2721 int pre_shift, post_shift;
2722 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2723 & GET_MODE_MASK (TYPE_MODE (itype)));
2724 tree t1, t2, t3, t4;
2726 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2727 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2728 return NULL;
2730 /* Find a suitable multiplier and right shift count
2731 instead of multiplying with D. */
2732 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2734 /* If the suggested multiplier is more than SIZE bits, we can do better
2735 for even divisors, using an initial right shift. */
2736 if (mh != 0 && (d & 1) == 0)
2738 pre_shift = ctz_or_zero (d);
2739 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2740 &ml, &post_shift, &dummy_int);
2741 gcc_assert (!mh);
2743 else
2744 pre_shift = 0;
2746 if (mh != 0)
2748 if (post_shift - 1 >= prec)
2749 return NULL;
2751 /* t1 = oprnd0 h* ml;
2752 t2 = oprnd0 - t1;
2753 t3 = t2 >> 1;
2754 t4 = t1 + t3;
2755 q = t4 >> (post_shift - 1); */
2756 t1 = vect_recog_temp_ssa_var (itype, NULL);
2757 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2758 build_int_cst (itype, ml));
2759 append_pattern_def_seq (stmt_vinfo, def_stmt);
2761 t2 = vect_recog_temp_ssa_var (itype, NULL);
2762 def_stmt
2763 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2764 append_pattern_def_seq (stmt_vinfo, def_stmt);
2766 t3 = vect_recog_temp_ssa_var (itype, NULL);
2767 def_stmt
2768 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2769 append_pattern_def_seq (stmt_vinfo, def_stmt);
2771 t4 = vect_recog_temp_ssa_var (itype, NULL);
2772 def_stmt
2773 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2775 if (post_shift != 1)
2777 append_pattern_def_seq (stmt_vinfo, def_stmt);
2779 q = vect_recog_temp_ssa_var (itype, NULL);
2780 pattern_stmt
2781 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2782 build_int_cst (itype, post_shift - 1));
2784 else
2786 q = t4;
2787 pattern_stmt = def_stmt;
2790 else
2792 if (pre_shift >= prec || post_shift >= prec)
2793 return NULL;
2795 /* t1 = oprnd0 >> pre_shift;
2796 t2 = t1 h* ml;
2797 q = t2 >> post_shift; */
2798 if (pre_shift)
2800 t1 = vect_recog_temp_ssa_var (itype, NULL);
2801 def_stmt
2802 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2803 build_int_cst (NULL, pre_shift));
2804 append_pattern_def_seq (stmt_vinfo, def_stmt);
2806 else
2807 t1 = oprnd0;
2809 t2 = vect_recog_temp_ssa_var (itype, NULL);
2810 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2811 build_int_cst (itype, ml));
2813 if (post_shift)
2815 append_pattern_def_seq (stmt_vinfo, def_stmt);
2817 q = vect_recog_temp_ssa_var (itype, NULL);
2818 def_stmt
2819 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2820 build_int_cst (itype, post_shift));
2822 else
2823 q = t2;
2825 pattern_stmt = def_stmt;
2828 else
2830 unsigned HOST_WIDE_INT ml;
2831 int post_shift;
2832 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2833 unsigned HOST_WIDE_INT abs_d;
2834 bool add = false;
2835 tree t1, t2, t3, t4;
2837 /* Give up for -1. */
2838 if (d == -1)
2839 return NULL;
2841 /* Since d might be INT_MIN, we have to cast to
2842 unsigned HOST_WIDE_INT before negating to avoid
2843 undefined signed overflow. */
2844 abs_d = (d >= 0
2845 ? (unsigned HOST_WIDE_INT) d
2846 : - (unsigned HOST_WIDE_INT) d);
2848 /* n rem d = n rem -d */
2849 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2851 d = abs_d;
2852 oprnd1 = build_int_cst (itype, abs_d);
2854 else if (HOST_BITS_PER_WIDE_INT >= prec
2855 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2856 /* This case is not handled correctly below. */
2857 return NULL;
2859 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2860 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2862 add = true;
2863 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2865 if (post_shift >= prec)
2866 return NULL;
2868 /* t1 = oprnd0 h* ml; */
2869 t1 = vect_recog_temp_ssa_var (itype, NULL);
2870 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2871 build_int_cst (itype, ml));
2873 if (add)
2875 /* t2 = t1 + oprnd0; */
2876 append_pattern_def_seq (stmt_vinfo, def_stmt);
2877 t2 = vect_recog_temp_ssa_var (itype, NULL);
2878 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2880 else
2881 t2 = t1;
2883 if (post_shift)
2885 /* t3 = t2 >> post_shift; */
2886 append_pattern_def_seq (stmt_vinfo, def_stmt);
2887 t3 = vect_recog_temp_ssa_var (itype, NULL);
2888 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2889 build_int_cst (itype, post_shift));
2891 else
2892 t3 = t2;
2894 wide_int oprnd0_min, oprnd0_max;
2895 int msb = 1;
2896 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2898 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2899 msb = 0;
2900 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2901 msb = -1;
2904 if (msb == 0 && d >= 0)
2906 /* q = t3; */
2907 q = t3;
2908 pattern_stmt = def_stmt;
2910 else
2912 /* t4 = oprnd0 >> (prec - 1);
2913 or if we know from VRP that oprnd0 >= 0
2914 t4 = 0;
2915 or if we know from VRP that oprnd0 < 0
2916 t4 = -1; */
2917 append_pattern_def_seq (stmt_vinfo, def_stmt);
2918 t4 = vect_recog_temp_ssa_var (itype, NULL);
2919 if (msb != 1)
2920 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2921 build_int_cst (itype, msb));
2922 else
2923 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2924 build_int_cst (itype, prec - 1));
2925 append_pattern_def_seq (stmt_vinfo, def_stmt);
2927 /* q = t3 - t4; or q = t4 - t3; */
2928 q = vect_recog_temp_ssa_var (itype, NULL);
2929 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2930 d < 0 ? t3 : t4);
2934 if (rhs_code == TRUNC_MOD_EXPR)
2936 tree r, t1;
2938 /* We divided. Now finish by:
2939 t1 = q * oprnd1;
2940 r = oprnd0 - t1; */
2941 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2943 t1 = vect_recog_temp_ssa_var (itype, NULL);
2944 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2945 append_pattern_def_seq (stmt_vinfo, def_stmt);
2947 r = vect_recog_temp_ssa_var (itype, NULL);
2948 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2951 /* Pattern detected. */
2952 if (dump_enabled_p ())
2954 dump_printf_loc (MSG_NOTE, vect_location,
2955 "vect_recog_divmod_pattern: detected: ");
2956 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2959 stmts->safe_push (last_stmt);
2961 *type_in = vectype;
2962 *type_out = vectype;
2963 return pattern_stmt;
2966 /* Function vect_recog_mixed_size_cond_pattern
2968 Try to find the following pattern:
2970 type x_t, y_t;
2971 TYPE a_T, b_T, c_T;
2972 loop:
2973 S1 a_T = x_t CMP y_t ? b_T : c_T;
2975 where type 'TYPE' is an integral type which has different size
2976 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2977 than 'type', the constants need to fit into an integer type
2978 with the same width as 'type') or results of conversion from 'type'.
2980 Input:
2982 * LAST_STMT: A stmt from which the pattern search begins.
2984 Output:
2986 * TYPE_IN: The type of the input arguments to the pattern.
2988 * TYPE_OUT: The type of the output of this pattern.
2990 * Return value: A new stmt that will be used to replace the pattern.
2991 Additionally a def_stmt is added.
2993 a_it = x_t CMP y_t ? b_it : c_it;
2994 a_T = (TYPE) a_it; */
2996 static gimple *
2997 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
2998 tree *type_out)
3000 gimple *last_stmt = (*stmts)[0];
3001 tree cond_expr, then_clause, else_clause;
3002 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3003 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3004 gimple *pattern_stmt, *def_stmt;
3005 vec_info *vinfo = stmt_vinfo->vinfo;
3006 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3007 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3008 bool promotion;
3009 tree comp_scalar_type;
3011 if (!is_gimple_assign (last_stmt)
3012 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3013 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3014 return NULL;
3016 cond_expr = gimple_assign_rhs1 (last_stmt);
3017 then_clause = gimple_assign_rhs2 (last_stmt);
3018 else_clause = gimple_assign_rhs3 (last_stmt);
3020 if (!COMPARISON_CLASS_P (cond_expr))
3021 return NULL;
3023 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3024 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3025 if (comp_vectype == NULL_TREE)
3026 return NULL;
3028 type = gimple_expr_type (last_stmt);
3029 if (types_compatible_p (type, comp_scalar_type)
3030 || ((TREE_CODE (then_clause) != INTEGER_CST
3031 || TREE_CODE (else_clause) != INTEGER_CST)
3032 && !INTEGRAL_TYPE_P (comp_scalar_type))
3033 || !INTEGRAL_TYPE_P (type))
3034 return NULL;
3036 if ((TREE_CODE (then_clause) != INTEGER_CST
3037 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3038 &def_stmt0, &promotion))
3039 || (TREE_CODE (else_clause) != INTEGER_CST
3040 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3041 &def_stmt1, &promotion)))
3042 return NULL;
3044 if (orig_type0 && orig_type1
3045 && !types_compatible_p (orig_type0, orig_type1))
3046 return NULL;
3048 if (orig_type0)
3050 if (!types_compatible_p (orig_type0, comp_scalar_type))
3051 return NULL;
3052 then_clause = gimple_assign_rhs1 (def_stmt0);
3053 itype = orig_type0;
3056 if (orig_type1)
3058 if (!types_compatible_p (orig_type1, comp_scalar_type))
3059 return NULL;
3060 else_clause = gimple_assign_rhs1 (def_stmt1);
3061 itype = orig_type1;
3065 HOST_WIDE_INT cmp_mode_size
3066 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3068 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
3069 return NULL;
3071 vectype = get_vectype_for_scalar_type (type);
3072 if (vectype == NULL_TREE)
3073 return NULL;
3075 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3076 return NULL;
3078 if (itype == NULL_TREE)
3079 itype = build_nonstandard_integer_type (cmp_mode_size,
3080 TYPE_UNSIGNED (type));
3082 if (itype == NULL_TREE
3083 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
3084 return NULL;
3086 vecitype = get_vectype_for_scalar_type (itype);
3087 if (vecitype == NULL_TREE)
3088 return NULL;
3090 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3091 return NULL;
3093 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
3095 if ((TREE_CODE (then_clause) == INTEGER_CST
3096 && !int_fits_type_p (then_clause, itype))
3097 || (TREE_CODE (else_clause) == INTEGER_CST
3098 && !int_fits_type_p (else_clause, itype)))
3099 return NULL;
3102 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3103 COND_EXPR, unshare_expr (cond_expr),
3104 fold_convert (itype, then_clause),
3105 fold_convert (itype, else_clause));
3106 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3107 NOP_EXPR, gimple_assign_lhs (def_stmt));
3109 new_pattern_def_seq (stmt_vinfo, def_stmt);
3110 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3111 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3112 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3113 *type_in = vecitype;
3114 *type_out = vectype;
3116 if (dump_enabled_p ())
3117 dump_printf_loc (MSG_NOTE, vect_location,
3118 "vect_recog_mixed_size_cond_pattern: detected:\n");
3120 return pattern_stmt;
3124 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3125 true if bool VAR can and should be optimized that way. Assume it shouldn't
3126 in case it's a result of a comparison which can be directly vectorized into
3127 a vector comparison. Fills in STMTS with all stmts visited during the
3128 walk. */
3130 static bool
3131 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3133 gimple *def_stmt;
3134 enum vect_def_type dt;
3135 tree rhs1;
3136 enum tree_code rhs_code;
3138 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3139 return false;
3141 if (dt != vect_internal_def)
3142 return false;
3144 if (!is_gimple_assign (def_stmt))
3145 return false;
3147 if (stmts.contains (def_stmt))
3148 return true;
3150 rhs1 = gimple_assign_rhs1 (def_stmt);
3151 rhs_code = gimple_assign_rhs_code (def_stmt);
3152 switch (rhs_code)
3154 case SSA_NAME:
3155 if (! check_bool_pattern (rhs1, vinfo, stmts))
3156 return false;
3157 break;
3159 CASE_CONVERT:
3160 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3161 return false;
3162 if (! check_bool_pattern (rhs1, vinfo, stmts))
3163 return false;
3164 break;
3166 case BIT_NOT_EXPR:
3167 if (! check_bool_pattern (rhs1, vinfo, stmts))
3168 return false;
3169 break;
3171 case BIT_AND_EXPR:
3172 case BIT_IOR_EXPR:
3173 case BIT_XOR_EXPR:
3174 if (! check_bool_pattern (rhs1, vinfo, stmts)
3175 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3176 return false;
3177 break;
3179 default:
3180 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3182 tree vecitype, comp_vectype;
3184 /* If the comparison can throw, then is_gimple_condexpr will be
3185 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3186 if (stmt_could_throw_p (def_stmt))
3187 return false;
3189 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3190 if (comp_vectype == NULL_TREE)
3191 return false;
3193 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3194 if (mask_type
3195 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3196 return false;
3198 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3200 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3201 tree itype
3202 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3203 vecitype = get_vectype_for_scalar_type (itype);
3204 if (vecitype == NULL_TREE)
3205 return false;
3207 else
3208 vecitype = comp_vectype;
3209 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3210 return false;
3212 else
3213 return false;
3214 break;
3217 bool res = stmts.add (def_stmt);
3218 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3219 gcc_assert (!res);
3221 return true;
3225 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3226 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3227 pattern sequence. */
3229 static tree
3230 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3232 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3233 NOP_EXPR, var);
3234 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3235 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3236 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3237 append_pattern_def_seq (stmt_info, cast_stmt);
3238 return gimple_assign_lhs (cast_stmt);
3241 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3242 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3243 type, OUT_TYPE is the desired final integer type of the whole pattern.
3244 STMT_INFO is the info of the pattern root and is where pattern stmts should
3245 be associated with. DEFS is a map of pattern defs. */
3247 static void
3248 adjust_bool_pattern (tree var, tree out_type,
3249 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3251 gimple *stmt = SSA_NAME_DEF_STMT (var);
3252 enum tree_code rhs_code, def_rhs_code;
3253 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3254 location_t loc;
3255 gimple *pattern_stmt, *def_stmt;
3256 tree trueval = NULL_TREE;
3258 rhs1 = gimple_assign_rhs1 (stmt);
3259 rhs2 = gimple_assign_rhs2 (stmt);
3260 rhs_code = gimple_assign_rhs_code (stmt);
3261 loc = gimple_location (stmt);
3262 switch (rhs_code)
3264 case SSA_NAME:
3265 CASE_CONVERT:
3266 irhs1 = *defs.get (rhs1);
3267 itype = TREE_TYPE (irhs1);
3268 pattern_stmt
3269 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3270 SSA_NAME, irhs1);
3271 break;
3273 case BIT_NOT_EXPR:
3274 irhs1 = *defs.get (rhs1);
3275 itype = TREE_TYPE (irhs1);
3276 pattern_stmt
3277 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3278 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3279 break;
3281 case BIT_AND_EXPR:
3282 /* Try to optimize x = y & (a < b ? 1 : 0); into
3283 x = (a < b ? y : 0);
3285 E.g. for:
3286 bool a_b, b_b, c_b;
3287 TYPE d_T;
3289 S1 a_b = x1 CMP1 y1;
3290 S2 b_b = x2 CMP2 y2;
3291 S3 c_b = a_b & b_b;
3292 S4 d_T = (TYPE) c_b;
3294 we would normally emit:
3296 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3297 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3298 S3' c_T = a_T & b_T;
3299 S4' d_T = c_T;
3301 but we can save one stmt by using the
3302 result of one of the COND_EXPRs in the other COND_EXPR and leave
3303 BIT_AND_EXPR stmt out:
3305 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3306 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3307 S4' f_T = c_T;
3309 At least when VEC_COND_EXPR is implemented using masks
3310 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3311 computes the comparison masks and ands it, in one case with
3312 all ones vector, in the other case with a vector register.
3313 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3314 often more expensive. */
3315 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3316 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3317 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3319 irhs1 = *defs.get (rhs1);
3320 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3321 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3322 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3324 rhs_code = def_rhs_code;
3325 rhs1 = def_rhs1;
3326 rhs2 = gimple_assign_rhs2 (def_stmt);
3327 trueval = irhs1;
3328 goto do_compare;
3330 else
3331 irhs2 = *defs.get (rhs2);
3332 goto and_ior_xor;
3334 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3335 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3336 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3338 irhs2 = *defs.get (rhs2);
3339 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3340 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3341 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3343 rhs_code = def_rhs_code;
3344 rhs1 = def_rhs1;
3345 rhs2 = gimple_assign_rhs2 (def_stmt);
3346 trueval = irhs2;
3347 goto do_compare;
3349 else
3350 irhs1 = *defs.get (rhs1);
3351 goto and_ior_xor;
3353 /* FALLTHRU */
3354 case BIT_IOR_EXPR:
3355 case BIT_XOR_EXPR:
3356 irhs1 = *defs.get (rhs1);
3357 irhs2 = *defs.get (rhs2);
3358 and_ior_xor:
3359 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3360 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3362 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3363 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3364 int out_prec = TYPE_PRECISION (out_type);
3365 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3366 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3367 stmt_info);
3368 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3369 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3370 stmt_info);
3371 else
3373 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3374 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3377 itype = TREE_TYPE (irhs1);
3378 pattern_stmt
3379 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3380 rhs_code, irhs1, irhs2);
3381 break;
3383 default:
3384 do_compare:
3385 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3386 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3387 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3388 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3389 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3391 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3392 itype
3393 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3395 else
3396 itype = TREE_TYPE (rhs1);
3397 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3398 if (trueval == NULL_TREE)
3399 trueval = build_int_cst (itype, 1);
3400 else
3401 gcc_checking_assert (useless_type_conversion_p (itype,
3402 TREE_TYPE (trueval)));
3403 pattern_stmt
3404 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3405 COND_EXPR, cond_expr, trueval,
3406 build_int_cst (itype, 0));
3407 break;
3410 gimple_set_location (pattern_stmt, loc);
3411 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3412 pattern def seq stmts instead of just letting auto-detection do
3413 its work? */
3414 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3415 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3416 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3417 append_pattern_def_seq (stmt_info, pattern_stmt);
3418 defs.put (var, gimple_assign_lhs (pattern_stmt));
3421 /* Comparison function to qsort a vector of gimple stmts after UID. */
3423 static int
3424 sort_after_uid (const void *p1, const void *p2)
3426 const gimple *stmt1 = *(const gimple * const *)p1;
3427 const gimple *stmt2 = *(const gimple * const *)p2;
3428 return gimple_uid (stmt1) - gimple_uid (stmt2);
3431 /* Create pattern stmts for all stmts participating in the bool pattern
3432 specified by BOOL_STMT_SET and its root STMT with the desired type
3433 OUT_TYPE. Return the def of the pattern root. */
3435 static tree
3436 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3437 tree out_type, gimple *stmt)
3439 /* Gather original stmts in the bool pattern in their order of appearance
3440 in the IL. */
3441 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3442 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3443 i != bool_stmt_set.end (); ++i)
3444 bool_stmts.quick_push (*i);
3445 bool_stmts.qsort (sort_after_uid);
3447 /* Now process them in that order, producing pattern stmts. */
3448 hash_map <tree, tree> defs;
3449 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3450 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3451 out_type, vinfo_for_stmt (stmt), defs);
3453 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3454 gimple *pattern_stmt
3455 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3456 return gimple_assign_lhs (pattern_stmt);
3459 /* Helper for search_type_for_mask. */
3461 static tree
3462 search_type_for_mask_1 (tree var, vec_info *vinfo,
3463 hash_map<gimple *, tree> &cache)
3465 gimple *def_stmt;
3466 enum vect_def_type dt;
3467 tree rhs1;
3468 enum tree_code rhs_code;
3469 tree res = NULL_TREE, res2;
3471 if (TREE_CODE (var) != SSA_NAME)
3472 return NULL_TREE;
3474 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3475 return NULL_TREE;
3477 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3478 return NULL_TREE;
3480 if (dt != vect_internal_def)
3481 return NULL_TREE;
3483 if (!is_gimple_assign (def_stmt))
3484 return NULL_TREE;
3486 tree *c = cache.get (def_stmt);
3487 if (c)
3488 return *c;
3490 rhs_code = gimple_assign_rhs_code (def_stmt);
3491 rhs1 = gimple_assign_rhs1 (def_stmt);
3493 switch (rhs_code)
3495 case SSA_NAME:
3496 case BIT_NOT_EXPR:
3497 CASE_CONVERT:
3498 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3499 break;
3501 case BIT_AND_EXPR:
3502 case BIT_IOR_EXPR:
3503 case BIT_XOR_EXPR:
3504 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3505 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3506 cache);
3507 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3508 res = res2;
3509 break;
3511 default:
3512 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3514 tree comp_vectype, mask_type;
3516 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3518 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3519 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3520 vinfo, cache);
3521 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3522 res = res2;
3523 break;
3526 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3527 if (comp_vectype == NULL_TREE)
3529 res = NULL_TREE;
3530 break;
3533 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3534 if (!mask_type
3535 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3537 res = NULL_TREE;
3538 break;
3541 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3542 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3544 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3545 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3547 else
3548 res = TREE_TYPE (rhs1);
3552 cache.put (def_stmt, res);
3553 return res;
3556 /* Return the proper type for converting bool VAR into
3557 an integer value or NULL_TREE if no such type exists.
3558 The type is chosen so that converted value has the
3559 same number of elements as VAR's vector type. */
3561 static tree
3562 search_type_for_mask (tree var, vec_info *vinfo)
3564 hash_map<gimple *, tree> cache;
3565 return search_type_for_mask_1 (var, vinfo, cache);
3568 /* Function vect_recog_bool_pattern
3570 Try to find pattern like following:
3572 bool a_b, b_b, c_b, d_b, e_b;
3573 TYPE f_T;
3574 loop:
3575 S1 a_b = x1 CMP1 y1;
3576 S2 b_b = x2 CMP2 y2;
3577 S3 c_b = a_b & b_b;
3578 S4 d_b = x3 CMP3 y3;
3579 S5 e_b = c_b | d_b;
3580 S6 f_T = (TYPE) e_b;
3582 where type 'TYPE' is an integral type. Or a similar pattern
3583 ending in
3585 S6 f_Y = e_b ? r_Y : s_Y;
3587 as results from if-conversion of a complex condition.
3589 Input:
3591 * LAST_STMT: A stmt at the end from which the pattern
3592 search begins, i.e. cast of a bool to
3593 an integer type.
3595 Output:
3597 * TYPE_IN: The type of the input arguments to the pattern.
3599 * TYPE_OUT: The type of the output of this pattern.
3601 * Return value: A new stmt that will be used to replace the pattern.
3603 Assuming size of TYPE is the same as size of all comparisons
3604 (otherwise some casts would be added where needed), the above
3605 sequence we create related pattern stmts:
3606 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3607 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3608 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3609 S5' e_T = c_T | d_T;
3610 S6' f_T = e_T;
3612 Instead of the above S3' we could emit:
3613 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3614 S3' c_T = a_T | b_T;
3615 but the above is more efficient. */
3617 static gimple *
3618 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3619 tree *type_out)
3621 gimple *last_stmt = stmts->pop ();
3622 enum tree_code rhs_code;
3623 tree var, lhs, rhs, vectype;
3624 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3625 stmt_vec_info new_stmt_info;
3626 vec_info *vinfo = stmt_vinfo->vinfo;
3627 gimple *pattern_stmt;
3629 if (!is_gimple_assign (last_stmt))
3630 return NULL;
3632 var = gimple_assign_rhs1 (last_stmt);
3633 lhs = gimple_assign_lhs (last_stmt);
3635 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3636 return NULL;
3638 hash_set<gimple *> bool_stmts;
3640 rhs_code = gimple_assign_rhs_code (last_stmt);
3641 if (CONVERT_EXPR_CODE_P (rhs_code))
3643 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3644 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3645 return NULL;
3646 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3647 if (vectype == NULL_TREE)
3648 return NULL;
3650 if (check_bool_pattern (var, vinfo, bool_stmts))
3652 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3653 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3654 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3655 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3656 else
3657 pattern_stmt
3658 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3660 else
3662 tree type = search_type_for_mask (var, vinfo);
3663 tree cst0, cst1, tmp;
3665 if (!type)
3666 return NULL;
3668 /* We may directly use cond with narrowed type to avoid
3669 multiple cond exprs with following result packing and
3670 perform single cond with packed mask instead. In case
3671 of widening we better make cond first and then extract
3672 results. */
3673 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3674 type = TREE_TYPE (lhs);
3676 cst0 = build_int_cst (type, 0);
3677 cst1 = build_int_cst (type, 1);
3678 tmp = vect_recog_temp_ssa_var (type, NULL);
3679 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3681 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3683 tree new_vectype = get_vectype_for_scalar_type (type);
3684 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3685 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3686 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3687 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3689 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3690 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3694 *type_out = vectype;
3695 *type_in = vectype;
3696 stmts->safe_push (last_stmt);
3697 if (dump_enabled_p ())
3698 dump_printf_loc (MSG_NOTE, vect_location,
3699 "vect_recog_bool_pattern: detected:\n");
3701 return pattern_stmt;
3703 else if (rhs_code == COND_EXPR
3704 && TREE_CODE (var) == SSA_NAME)
3706 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3707 if (vectype == NULL_TREE)
3708 return NULL;
3710 /* Build a scalar type for the boolean result that when
3711 vectorized matches the vector type of the result in
3712 size and number of elements. */
3713 unsigned prec
3714 = wi::udiv_trunc (TYPE_SIZE (vectype),
3715 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3716 tree type
3717 = build_nonstandard_integer_type (prec,
3718 TYPE_UNSIGNED (TREE_TYPE (var)));
3719 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3720 return NULL;
3722 if (!check_bool_pattern (var, vinfo, bool_stmts))
3723 return NULL;
3725 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3727 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3728 pattern_stmt
3729 = gimple_build_assign (lhs, COND_EXPR,
3730 build2 (NE_EXPR, boolean_type_node,
3731 rhs, build_int_cst (type, 0)),
3732 gimple_assign_rhs2 (last_stmt),
3733 gimple_assign_rhs3 (last_stmt));
3734 *type_out = vectype;
3735 *type_in = vectype;
3736 stmts->safe_push (last_stmt);
3737 if (dump_enabled_p ())
3738 dump_printf_loc (MSG_NOTE, vect_location,
3739 "vect_recog_bool_pattern: detected:\n");
3741 return pattern_stmt;
3743 else if (rhs_code == SSA_NAME
3744 && STMT_VINFO_DATA_REF (stmt_vinfo))
3746 stmt_vec_info pattern_stmt_info;
3747 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3748 gcc_assert (vectype != NULL_TREE);
3749 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3750 return NULL;
3752 if (check_bool_pattern (var, vinfo, bool_stmts))
3753 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3754 else
3756 tree type = search_type_for_mask (var, vinfo);
3757 tree cst0, cst1, new_vectype;
3759 if (!type)
3760 return NULL;
3762 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3763 type = TREE_TYPE (vectype);
3765 cst0 = build_int_cst (type, 0);
3766 cst1 = build_int_cst (type, 1);
3767 new_vectype = get_vectype_for_scalar_type (type);
3769 rhs = vect_recog_temp_ssa_var (type, NULL);
3770 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3772 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3773 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3774 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3775 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3778 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3779 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3781 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3782 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3783 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3784 rhs = rhs2;
3786 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3787 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3788 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3789 STMT_VINFO_DATA_REF (pattern_stmt_info)
3790 = STMT_VINFO_DATA_REF (stmt_vinfo);
3791 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3792 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3793 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3794 *type_out = vectype;
3795 *type_in = vectype;
3796 stmts->safe_push (last_stmt);
3797 if (dump_enabled_p ())
3798 dump_printf_loc (MSG_NOTE, vect_location,
3799 "vect_recog_bool_pattern: detected:\n");
3800 return pattern_stmt;
3802 else
3803 return NULL;
3807 /* A helper for vect_recog_mask_conversion_pattern. Build
3808 conversion of MASK to a type suitable for masking VECTYPE.
3809 Built statement gets required vectype and is appended to
3810 a pattern sequence of STMT_VINFO.
3812 Return converted mask. */
3814 static tree
3815 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3816 vec_info *vinfo)
3818 gimple *stmt;
3819 tree masktype, tmp;
3820 stmt_vec_info new_stmt_info;
3822 masktype = build_same_sized_truth_vector_type (vectype);
3823 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3824 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3825 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3826 set_vinfo_for_stmt (stmt, new_stmt_info);
3827 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3828 append_pattern_def_seq (stmt_vinfo, stmt);
3830 return tmp;
3834 /* Function vect_recog_mask_conversion_pattern
3836 Try to find statements which require boolean type
3837 converison. Additional conversion statements are
3838 added to handle such cases. For example:
3840 bool m_1, m_2, m_3;
3841 int i_4, i_5;
3842 double d_6, d_7;
3843 char c_1, c_2, c_3;
3845 S1 m_1 = i_4 > i_5;
3846 S2 m_2 = d_6 < d_7;
3847 S3 m_3 = m_1 & m_2;
3848 S4 c_1 = m_3 ? c_2 : c_3;
3850 Will be transformed into:
3852 S1 m_1 = i_4 > i_5;
3853 S2 m_2 = d_6 < d_7;
3854 S3'' m_2' = (_Bool[bitsize=32])m_2
3855 S3' m_3' = m_1 & m_2';
3856 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3857 S4' c_1' = m_3'' ? c_2 : c_3; */
3859 static gimple *
3860 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3861 tree *type_out)
3863 gimple *last_stmt = stmts->pop ();
3864 enum tree_code rhs_code;
3865 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3866 tree vectype1, vectype2;
3867 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3868 stmt_vec_info pattern_stmt_info;
3869 vec_info *vinfo = stmt_vinfo->vinfo;
3870 gimple *pattern_stmt;
3872 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3873 if (is_gimple_call (last_stmt)
3874 && gimple_call_internal_p (last_stmt)
3875 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3876 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3878 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3880 if (load)
3882 lhs = gimple_call_lhs (last_stmt);
3883 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3885 else
3887 rhs2 = gimple_call_arg (last_stmt, 3);
3888 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3891 rhs1 = gimple_call_arg (last_stmt, 2);
3892 rhs1_type = search_type_for_mask (rhs1, vinfo);
3893 if (!rhs1_type)
3894 return NULL;
3895 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3897 if (!vectype1 || !vectype2
3898 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3899 return NULL;
3901 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3903 if (load)
3905 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3906 pattern_stmt
3907 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3908 gimple_call_arg (last_stmt, 0),
3909 gimple_call_arg (last_stmt, 1),
3910 tmp);
3911 gimple_call_set_lhs (pattern_stmt, lhs);
3913 else
3914 pattern_stmt
3915 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3916 gimple_call_arg (last_stmt, 0),
3917 gimple_call_arg (last_stmt, 1),
3918 tmp,
3919 gimple_call_arg (last_stmt, 3));
3922 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3923 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3924 STMT_VINFO_DATA_REF (pattern_stmt_info)
3925 = STMT_VINFO_DATA_REF (stmt_vinfo);
3926 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3927 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3928 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3930 *type_out = vectype1;
3931 *type_in = vectype1;
3932 stmts->safe_push (last_stmt);
3933 if (dump_enabled_p ())
3934 dump_printf_loc (MSG_NOTE, vect_location,
3935 "vect_recog_mask_conversion_pattern: detected:\n");
3937 return pattern_stmt;
3940 if (!is_gimple_assign (last_stmt))
3941 return NULL;
3943 lhs = gimple_assign_lhs (last_stmt);
3944 rhs1 = gimple_assign_rhs1 (last_stmt);
3945 rhs_code = gimple_assign_rhs_code (last_stmt);
3947 /* Check for cond expression requiring mask conversion. */
3948 if (rhs_code == COND_EXPR)
3950 /* vect_recog_mixed_size_cond_pattern could apply.
3951 Do nothing then. */
3952 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3953 return NULL;
3955 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3957 if (TREE_CODE (rhs1) == SSA_NAME)
3959 rhs1_type = search_type_for_mask (rhs1, vinfo);
3960 if (!rhs1_type)
3961 return NULL;
3963 else if (COMPARISON_CLASS_P (rhs1))
3964 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3965 else
3966 return NULL;
3968 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3970 if (!vectype1 || !vectype2
3971 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3972 return NULL;
3974 /* If rhs1 is a comparison we need to move it into a
3975 separate statement. */
3976 if (TREE_CODE (rhs1) != SSA_NAME)
3978 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3979 pattern_stmt = gimple_build_assign (tmp, rhs1);
3980 rhs1 = tmp;
3982 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3983 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3984 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
3985 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3988 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3990 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3991 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
3992 gimple_assign_rhs2 (last_stmt),
3993 gimple_assign_rhs3 (last_stmt));
3995 *type_out = vectype1;
3996 *type_in = vectype1;
3997 stmts->safe_push (last_stmt);
3998 if (dump_enabled_p ())
3999 dump_printf_loc (MSG_NOTE, vect_location,
4000 "vect_recog_mask_conversion_pattern: detected:\n");
4002 return pattern_stmt;
4005 /* Now check for binary boolean operations requiring conversion for
4006 one of operands. */
4007 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4008 return NULL;
4010 if (rhs_code != BIT_IOR_EXPR
4011 && rhs_code != BIT_XOR_EXPR
4012 && rhs_code != BIT_AND_EXPR
4013 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4014 return NULL;
4016 rhs2 = gimple_assign_rhs2 (last_stmt);
4018 rhs1_type = search_type_for_mask (rhs1, vinfo);
4019 rhs2_type = search_type_for_mask (rhs2, vinfo);
4021 if (!rhs1_type || !rhs2_type
4022 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4023 return NULL;
4025 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4027 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4028 if (!vectype1)
4029 return NULL;
4030 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4032 else
4034 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4035 if (!vectype1)
4036 return NULL;
4037 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4040 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4041 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4043 *type_out = vectype1;
4044 *type_in = vectype1;
4045 stmts->safe_push (last_stmt);
4046 if (dump_enabled_p ())
4047 dump_printf_loc (MSG_NOTE, vect_location,
4048 "vect_recog_mask_conversion_pattern: detected:\n");
4050 return pattern_stmt;
4054 /* Mark statements that are involved in a pattern. */
4056 static inline void
4057 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4058 tree pattern_vectype)
4060 stmt_vec_info pattern_stmt_info, def_stmt_info;
4061 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4062 vec_info *vinfo = orig_stmt_info->vinfo;
4063 gimple *def_stmt;
4065 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4066 if (pattern_stmt_info == NULL)
4068 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4069 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4071 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4073 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4074 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4075 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4076 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4077 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4078 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4079 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
4080 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4081 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
4083 gimple_stmt_iterator si;
4084 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
4085 !gsi_end_p (si); gsi_next (&si))
4087 def_stmt = gsi_stmt (si);
4088 def_stmt_info = vinfo_for_stmt (def_stmt);
4089 if (def_stmt_info == NULL)
4091 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4092 set_vinfo_for_stmt (def_stmt, def_stmt_info);
4094 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4095 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4096 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4097 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4098 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4103 /* Function vect_pattern_recog_1
4105 Input:
4106 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4107 computation pattern.
4108 STMT: A stmt from which the pattern search should start.
4110 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4111 expression that computes the same functionality and can be used to
4112 replace the sequence of stmts that are involved in the pattern.
4114 Output:
4115 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4116 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4117 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4118 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4119 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4120 to the available target pattern.
4122 This function also does some bookkeeping, as explained in the documentation
4123 for vect_recog_pattern. */
4125 static bool
4126 vect_pattern_recog_1 (vect_recog_func *recog_func,
4127 gimple_stmt_iterator si,
4128 vec<gimple *> *stmts_to_replace)
4130 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4131 stmt_vec_info stmt_info;
4132 loop_vec_info loop_vinfo;
4133 tree pattern_vectype;
4134 tree type_in, type_out;
4135 enum tree_code code;
4136 int i;
4137 gimple *next;
4139 stmts_to_replace->truncate (0);
4140 stmts_to_replace->quick_push (stmt);
4141 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4142 if (!pattern_stmt)
4143 return false;
4145 stmt = stmts_to_replace->last ();
4146 stmt_info = vinfo_for_stmt (stmt);
4147 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4149 if (VECTOR_BOOLEAN_TYPE_P (type_in)
4150 || VECTOR_MODE_P (TYPE_MODE (type_in)))
4152 /* No need to check target support (already checked by the pattern
4153 recognition function). */
4154 pattern_vectype = type_out ? type_out : type_in;
4156 else
4158 machine_mode vec_mode;
4159 enum insn_code icode;
4160 optab optab;
4162 /* Check target support */
4163 type_in = get_vectype_for_scalar_type (type_in);
4164 if (!type_in)
4165 return false;
4166 if (type_out)
4167 type_out = get_vectype_for_scalar_type (type_out);
4168 else
4169 type_out = type_in;
4170 if (!type_out)
4171 return false;
4172 pattern_vectype = type_out;
4174 if (is_gimple_assign (pattern_stmt))
4175 code = gimple_assign_rhs_code (pattern_stmt);
4176 else
4178 gcc_assert (is_gimple_call (pattern_stmt));
4179 code = CALL_EXPR;
4182 optab = optab_for_tree_code (code, type_in, optab_default);
4183 vec_mode = TYPE_MODE (type_in);
4184 if (!optab
4185 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4186 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4187 return false;
4190 /* Found a vectorizable pattern. */
4191 if (dump_enabled_p ())
4193 dump_printf_loc (MSG_NOTE, vect_location,
4194 "%s pattern recognized: ", recog_func->name);
4195 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4198 /* Mark the stmts that are involved in the pattern. */
4199 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4201 /* Patterns cannot be vectorized using SLP, because they change the order of
4202 computation. */
4203 if (loop_vinfo)
4204 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
4205 if (next == stmt)
4206 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
4208 /* It is possible that additional pattern stmts are created and inserted in
4209 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4210 relevant statements. */
4211 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4212 && (unsigned) i < (stmts_to_replace->length () - 1);
4213 i++)
4215 stmt_info = vinfo_for_stmt (stmt);
4216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4217 if (dump_enabled_p ())
4219 dump_printf_loc (MSG_NOTE, vect_location,
4220 "additional pattern stmt: ");
4221 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4224 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4227 return true;
4231 /* Function vect_pattern_recog
4233 Input:
4234 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4235 computation idioms.
4237 Output - for each computation idiom that is detected we create a new stmt
4238 that provides the same functionality and that can be vectorized. We
4239 also record some information in the struct_stmt_info of the relevant
4240 stmts, as explained below:
4242 At the entry to this function we have the following stmts, with the
4243 following initial value in the STMT_VINFO fields:
4245 stmt in_pattern_p related_stmt vec_stmt
4246 S1: a_i = .... - - -
4247 S2: a_2 = ..use(a_i).. - - -
4248 S3: a_1 = ..use(a_2).. - - -
4249 S4: a_0 = ..use(a_1).. - - -
4250 S5: ... = ..use(a_0).. - - -
4252 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4253 represented by a single stmt. We then:
4254 - create a new stmt S6 equivalent to the pattern (the stmt is not
4255 inserted into the code)
4256 - fill in the STMT_VINFO fields as follows:
4258 in_pattern_p related_stmt vec_stmt
4259 S1: a_i = .... - - -
4260 S2: a_2 = ..use(a_i).. - - -
4261 S3: a_1 = ..use(a_2).. - - -
4262 S4: a_0 = ..use(a_1).. true S6 -
4263 '---> S6: a_new = .... - S4 -
4264 S5: ... = ..use(a_0).. - - -
4266 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4267 to each other through the RELATED_STMT field).
4269 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4270 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4271 remain irrelevant unless used by stmts other than S4.
4273 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4274 (because they are marked as irrelevant). It will vectorize S6, and record
4275 a pointer to the new vector stmt VS6 from S6 (as usual).
4276 S4 will be skipped, and S5 will be vectorized as usual:
4278 in_pattern_p related_stmt vec_stmt
4279 S1: a_i = .... - - -
4280 S2: a_2 = ..use(a_i).. - - -
4281 S3: a_1 = ..use(a_2).. - - -
4282 > VS6: va_new = .... - - -
4283 S4: a_0 = ..use(a_1).. true S6 VS6
4284 '---> S6: a_new = .... - S4 VS6
4285 > VS5: ... = ..vuse(va_new).. - - -
4286 S5: ... = ..use(a_0).. - - -
4288 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4289 elsewhere), and we'll end up with:
4291 VS6: va_new = ....
4292 VS5: ... = ..vuse(va_new)..
4294 In case of more than one pattern statements, e.g., widen-mult with
4295 intermediate type:
4297 S1 a_t = ;
4298 S2 a_T = (TYPE) a_t;
4299 '--> S3: a_it = (interm_type) a_t;
4300 S4 prod_T = a_T * CONST;
4301 '--> S5: prod_T' = a_it w* CONST;
4303 there may be other users of a_T outside the pattern. In that case S2 will
4304 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4305 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4306 be recorded in S3. */
4308 void
4309 vect_pattern_recog (vec_info *vinfo)
4311 struct loop *loop;
4312 basic_block *bbs;
4313 unsigned int nbbs;
4314 gimple_stmt_iterator si;
4315 unsigned int i, j;
4316 auto_vec<gimple *, 1> stmts_to_replace;
4317 gimple *stmt;
4319 if (dump_enabled_p ())
4320 dump_printf_loc (MSG_NOTE, vect_location,
4321 "=== vect_pattern_recog ===\n");
4323 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4325 loop = LOOP_VINFO_LOOP (loop_vinfo);
4326 bbs = LOOP_VINFO_BBS (loop_vinfo);
4327 nbbs = loop->num_nodes;
4329 /* Scan through the loop stmts, applying the pattern recognition
4330 functions starting at each stmt visited: */
4331 for (i = 0; i < nbbs; i++)
4333 basic_block bb = bbs[i];
4334 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4336 /* Scan over all generic vect_recog_xxx_pattern functions. */
4337 for (j = 0; j < NUM_PATTERNS; j++)
4338 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4339 &stmts_to_replace))
4340 break;
4344 else
4346 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4347 for (si = bb_vinfo->region_begin;
4348 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4350 if ((stmt = gsi_stmt (si))
4351 && vinfo_for_stmt (stmt)
4352 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4353 continue;
4355 /* Scan over all generic vect_recog_xxx_pattern functions. */
4356 for (j = 0; j < NUM_PATTERNS; j++)
4357 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4358 &stmts_to_replace))
4359 break;