2018-02-09 Sebastian Perta <sebastian.perta@renesas.com>
[official-gcc.git] / gcc / tree-vect-patterns.c
blob1279352125df827c0c4bdb2cbff495e92e2b7f54
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
45 /* Pattern recognition functions */
46 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
47 tree *);
48 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
49 tree *);
50 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
53 tree *);
54 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
55 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
56 tree *);
57 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
58 tree *, tree *);
59 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
60 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
61 tree *, tree *);
62 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
63 tree *, tree *);
65 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
66 tree *, tree *);
68 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69 tree *, tree *);
70 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
71 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
72 static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
73 tree *);
75 struct vect_recog_func
77 vect_recog_func_ptr fn;
78 const char *name;
81 /* Note that ordering matters - the first pattern matching on a stmt
82 is taken which means usually the more complex one needs to preceed
83 the less comples onex (widen_sum only after dot_prod or sad for example). */
84 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
85 { vect_recog_widen_mult_pattern, "widen_mult" },
86 { vect_recog_dot_prod_pattern, "dot_prod" },
87 { vect_recog_sad_pattern, "sad" },
88 { vect_recog_widen_sum_pattern, "widen_sum" },
89 { vect_recog_pow_pattern, "pow" },
90 { vect_recog_widen_shift_pattern, "widen_shift" },
91 { vect_recog_over_widening_pattern, "over_widening" },
92 { vect_recog_rotate_pattern, "rotate" },
93 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
94 { vect_recog_divmod_pattern, "divmod" },
95 { vect_recog_mult_pattern, "mult" },
96 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
97 { vect_recog_bool_pattern, "bool" },
98 /* This must come before mask conversion, and includes the parts
99 of mask conversion that are needed for gather and scatter
100 internal functions. */
101 { vect_recog_gather_scatter_pattern, "gather_scatter" },
102 { vect_recog_mask_conversion_pattern, "mask_conversion" }
105 static inline void
106 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
108 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
109 stmt);
112 static inline void
113 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
115 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
116 append_pattern_def_seq (stmt_info, stmt);
119 /* Check whether STMT2 is in the same loop or basic block as STMT1.
120 Which of the two applies depends on whether we're currently doing
121 loop-based or basic-block-based vectorization, as determined by
122 the vinfo_for_stmt for STMT1 (which must be defined).
124 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
125 to be defined as well. */
127 static bool
128 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
130 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
131 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
134 /* If the LHS of DEF_STMT has a single use, and that statement is
135 in the same loop or basic block, return it. */
137 static gimple *
138 vect_single_imm_use (gimple *def_stmt)
140 tree lhs = gimple_assign_lhs (def_stmt);
141 use_operand_p use_p;
142 gimple *use_stmt;
144 if (!single_imm_use (lhs, &use_p, &use_stmt))
145 return NULL;
147 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
148 return NULL;
150 return use_stmt;
153 /* Check whether NAME, an ssa-name used in USE_STMT,
154 is a result of a type promotion, such that:
155 DEF_STMT: NAME = NOP (name0)
156 If CHECK_SIGN is TRUE, check that either both types are signed or both are
157 unsigned. */
159 static bool
160 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
161 tree *orig_type, gimple **def_stmt, bool *promotion)
163 gimple *dummy_gimple;
164 stmt_vec_info stmt_vinfo;
165 tree type = TREE_TYPE (name);
166 tree oprnd0;
167 enum vect_def_type dt;
169 stmt_vinfo = vinfo_for_stmt (use_stmt);
170 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
171 return false;
173 if (dt != vect_internal_def
174 && dt != vect_external_def && dt != vect_constant_def)
175 return false;
177 if (!*def_stmt)
178 return false;
180 if (dt == vect_internal_def)
182 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
183 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
184 return false;
187 if (!is_gimple_assign (*def_stmt))
188 return false;
190 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
191 return false;
193 oprnd0 = gimple_assign_rhs1 (*def_stmt);
195 *orig_type = TREE_TYPE (oprnd0);
196 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
197 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
198 return false;
200 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
201 *promotion = true;
202 else
203 *promotion = false;
205 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
206 return false;
208 return true;
211 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
212 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
214 static tree
215 vect_recog_temp_ssa_var (tree type, gimple *stmt)
217 return make_temp_ssa_name (type, stmt, "patt");
220 /* Return true if STMT_VINFO describes a reduction for which reassociation
221 is allowed. */
223 static bool
224 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
226 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
227 && STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION);
230 /* Function vect_recog_dot_prod_pattern
232 Try to find the following pattern:
234 type x_t, y_t;
235 TYPE1 prod;
236 TYPE2 sum = init;
237 loop:
238 sum_0 = phi <init, sum_1>
239 S1 x_t = ...
240 S2 y_t = ...
241 S3 x_T = (TYPE1) x_t;
242 S4 y_T = (TYPE1) y_t;
243 S5 prod = x_T * y_T;
244 [S6 prod = (TYPE2) prod; #optional]
245 S7 sum_1 = prod + sum_0;
247 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
248 same size of 'TYPE1' or bigger. This is a special case of a reduction
249 computation.
251 Input:
253 * STMTS: Contains a stmt from which the pattern search begins. In the
254 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
255 will be detected.
257 Output:
259 * TYPE_IN: The type of the input arguments to the pattern.
261 * TYPE_OUT: The type of the output of this pattern.
263 * Return value: A new stmt that will be used to replace the sequence of
264 stmts that constitute the pattern. In this case it will be:
265 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
267 Note: The dot-prod idiom is a widening reduction pattern that is
268 vectorized without preserving all the intermediate results. It
269 produces only N/2 (widened) results (by summing up pairs of
270 intermediate results) rather than all N results. Therefore, we
271 cannot allow this pattern when we want to get all the results and in
272 the correct order (as is the case when this computation is in an
273 inner-loop nested in an outer-loop that us being vectorized). */
275 static gimple *
276 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
277 tree *type_out)
279 gimple *stmt, *last_stmt = (*stmts)[0];
280 tree oprnd0, oprnd1;
281 tree oprnd00, oprnd01;
282 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
283 tree type, half_type;
284 gimple *pattern_stmt;
285 tree prod_type;
286 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
287 struct loop *loop;
288 tree var;
289 bool promotion;
291 if (!loop_info)
292 return NULL;
294 loop = LOOP_VINFO_LOOP (loop_info);
296 /* We don't allow changing the order of the computation in the inner-loop
297 when doing outer-loop vectorization. */
298 if (loop && nested_in_vect_loop_p (loop, last_stmt))
299 return NULL;
301 if (!is_gimple_assign (last_stmt))
302 return NULL;
304 type = gimple_expr_type (last_stmt);
306 /* Look for the following pattern
307 DX = (TYPE1) X;
308 DY = (TYPE1) Y;
309 DPROD = DX * DY;
310 DDPROD = (TYPE2) DPROD;
311 sum_1 = DDPROD + sum_0;
312 In which
313 - DX is double the size of X
314 - DY is double the size of Y
315 - DX, DY, DPROD all have the same type
316 - sum is the same size of DPROD or bigger
317 - sum has been recognized as a reduction variable.
319 This is equivalent to:
320 DPROD = X w* Y; #widen mult
321 sum_1 = DPROD w+ sum_0; #widen summation
323 DPROD = X w* Y; #widen mult
324 sum_1 = DPROD + sum_0; #summation
327 /* Starting from LAST_STMT, follow the defs of its uses in search
328 of the above pattern. */
330 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
331 return NULL;
333 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
335 /* Has been detected as widening-summation? */
337 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
338 type = gimple_expr_type (stmt);
339 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
340 return NULL;
341 oprnd0 = gimple_assign_rhs1 (stmt);
342 oprnd1 = gimple_assign_rhs2 (stmt);
343 half_type = TREE_TYPE (oprnd0);
345 else
347 gimple *def_stmt;
349 if (!vect_reassociating_reduction_p (stmt_vinfo)
350 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
351 return NULL;
352 oprnd0 = gimple_assign_rhs1 (last_stmt);
353 oprnd1 = gimple_assign_rhs2 (last_stmt);
354 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
355 || !types_compatible_p (TREE_TYPE (oprnd1), type))
356 return NULL;
357 stmt = last_stmt;
359 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
360 &promotion)
361 && promotion)
363 stmt = def_stmt;
364 oprnd0 = gimple_assign_rhs1 (stmt);
366 else
367 half_type = type;
370 /* So far so good. Since last_stmt was detected as a (summation) reduction,
371 we know that oprnd1 is the reduction variable (defined by a loop-header
372 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
373 Left to check that oprnd0 is defined by a (widen_)mult_expr */
374 if (TREE_CODE (oprnd0) != SSA_NAME)
375 return NULL;
377 prod_type = half_type;
378 stmt = SSA_NAME_DEF_STMT (oprnd0);
380 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
381 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
382 return NULL;
384 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
385 inside the loop (in case we are analyzing an outer-loop). */
386 if (!is_gimple_assign (stmt))
387 return NULL;
388 stmt_vinfo = vinfo_for_stmt (stmt);
389 gcc_assert (stmt_vinfo);
390 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
391 return NULL;
392 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
393 return NULL;
394 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
396 /* Has been detected as a widening multiplication? */
398 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
399 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
400 return NULL;
401 stmt_vinfo = vinfo_for_stmt (stmt);
402 gcc_assert (stmt_vinfo);
403 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
404 oprnd00 = gimple_assign_rhs1 (stmt);
405 oprnd01 = gimple_assign_rhs2 (stmt);
406 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
407 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
409 else
411 tree half_type0, half_type1;
412 gimple *def_stmt;
413 tree oprnd0, oprnd1;
415 oprnd0 = gimple_assign_rhs1 (stmt);
416 oprnd1 = gimple_assign_rhs2 (stmt);
417 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
418 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
419 return NULL;
420 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
421 &promotion)
422 || !promotion)
423 return NULL;
424 oprnd00 = gimple_assign_rhs1 (def_stmt);
425 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
426 &promotion)
427 || !promotion)
428 return NULL;
429 oprnd01 = gimple_assign_rhs1 (def_stmt);
430 if (!types_compatible_p (half_type0, half_type1))
431 return NULL;
432 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
433 return NULL;
436 half_type = TREE_TYPE (oprnd00);
437 *type_in = half_type;
438 *type_out = type;
440 /* Pattern detected. Create a stmt to be used to replace the pattern: */
441 var = vect_recog_temp_ssa_var (type, NULL);
442 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
443 oprnd00, oprnd01, oprnd1);
445 if (dump_enabled_p ())
447 dump_printf_loc (MSG_NOTE, vect_location,
448 "vect_recog_dot_prod_pattern: detected: ");
449 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
452 return pattern_stmt;
456 /* Function vect_recog_sad_pattern
458 Try to find the following Sum of Absolute Difference (SAD) pattern:
460 type x_t, y_t;
461 signed TYPE1 diff, abs_diff;
462 TYPE2 sum = init;
463 loop:
464 sum_0 = phi <init, sum_1>
465 S1 x_t = ...
466 S2 y_t = ...
467 S3 x_T = (TYPE1) x_t;
468 S4 y_T = (TYPE1) y_t;
469 S5 diff = x_T - y_T;
470 S6 abs_diff = ABS_EXPR <diff>;
471 [S7 abs_diff = (TYPE2) abs_diff; #optional]
472 S8 sum_1 = abs_diff + sum_0;
474 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
475 same size of 'TYPE1' or bigger. This is a special case of a reduction
476 computation.
478 Input:
480 * STMTS: Contains a stmt from which the pattern search begins. In the
481 example, when this function is called with S8, the pattern
482 {S3,S4,S5,S6,S7,S8} will be detected.
484 Output:
486 * TYPE_IN: The type of the input arguments to the pattern.
488 * TYPE_OUT: The type of the output of this pattern.
490 * Return value: A new stmt that will be used to replace the sequence of
491 stmts that constitute the pattern. In this case it will be:
492 SAD_EXPR <x_t, y_t, sum_0>
495 static gimple *
496 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
497 tree *type_out)
499 gimple *last_stmt = (*stmts)[0];
500 tree sad_oprnd0, sad_oprnd1;
501 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
502 tree half_type;
503 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
504 struct loop *loop;
505 bool promotion;
507 if (!loop_info)
508 return NULL;
510 loop = LOOP_VINFO_LOOP (loop_info);
512 /* We don't allow changing the order of the computation in the inner-loop
513 when doing outer-loop vectorization. */
514 if (loop && nested_in_vect_loop_p (loop, last_stmt))
515 return NULL;
517 if (!is_gimple_assign (last_stmt))
518 return NULL;
520 tree sum_type = gimple_expr_type (last_stmt);
522 /* Look for the following pattern
523 DX = (TYPE1) X;
524 DY = (TYPE1) Y;
525 DDIFF = DX - DY;
526 DAD = ABS_EXPR <DDIFF>;
527 DDPROD = (TYPE2) DPROD;
528 sum_1 = DAD + sum_0;
529 In which
530 - DX is at least double the size of X
531 - DY is at least double the size of Y
532 - DX, DY, DDIFF, DAD all have the same type
533 - sum is the same size of DAD or bigger
534 - sum has been recognized as a reduction variable.
536 This is equivalent to:
537 DDIFF = X w- Y; #widen sub
538 DAD = ABS_EXPR <DDIFF>;
539 sum_1 = DAD w+ sum_0; #widen summation
541 DDIFF = X w- Y; #widen sub
542 DAD = ABS_EXPR <DDIFF>;
543 sum_1 = DAD + sum_0; #summation
546 /* Starting from LAST_STMT, follow the defs of its uses in search
547 of the above pattern. */
549 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
550 return NULL;
552 tree plus_oprnd0, plus_oprnd1;
554 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
556 /* Has been detected as widening-summation? */
558 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
559 sum_type = gimple_expr_type (stmt);
560 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
561 return NULL;
562 plus_oprnd0 = gimple_assign_rhs1 (stmt);
563 plus_oprnd1 = gimple_assign_rhs2 (stmt);
564 half_type = TREE_TYPE (plus_oprnd0);
566 else
568 gimple *def_stmt;
570 if (!vect_reassociating_reduction_p (stmt_vinfo)
571 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
572 return NULL;
573 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
574 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
575 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
576 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
577 return NULL;
579 /* The type conversion could be promotion, demotion,
580 or just signed -> unsigned. */
581 if (type_conversion_p (plus_oprnd0, last_stmt, false,
582 &half_type, &def_stmt, &promotion))
583 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
584 else
585 half_type = sum_type;
588 /* So far so good. Since last_stmt was detected as a (summation) reduction,
589 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
590 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
591 Then check that plus_oprnd0 is defined by an abs_expr. */
593 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
594 return NULL;
596 tree abs_type = half_type;
597 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
599 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
600 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
601 return NULL;
603 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
604 inside the loop (in case we are analyzing an outer-loop). */
605 if (!is_gimple_assign (abs_stmt))
606 return NULL;
608 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
609 gcc_assert (abs_stmt_vinfo);
610 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
611 return NULL;
612 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
613 return NULL;
615 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
616 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
617 return NULL;
618 if (TYPE_UNSIGNED (abs_type))
619 return NULL;
621 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
623 if (TREE_CODE (abs_oprnd) != SSA_NAME)
624 return NULL;
626 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
628 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
629 if (!gimple_bb (diff_stmt)
630 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
631 return NULL;
633 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
634 inside the loop (in case we are analyzing an outer-loop). */
635 if (!is_gimple_assign (diff_stmt))
636 return NULL;
638 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
639 gcc_assert (diff_stmt_vinfo);
640 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
641 return NULL;
642 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
643 return NULL;
645 tree half_type0, half_type1;
646 gimple *def_stmt;
648 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
649 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
651 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
652 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
653 return NULL;
654 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
655 &half_type0, &def_stmt, &promotion)
656 || !promotion)
657 return NULL;
658 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
660 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
661 &half_type1, &def_stmt, &promotion)
662 || !promotion)
663 return NULL;
664 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
666 if (!types_compatible_p (half_type0, half_type1))
667 return NULL;
668 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
669 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
670 return NULL;
672 *type_in = TREE_TYPE (sad_oprnd0);
673 *type_out = sum_type;
675 /* Pattern detected. Create a stmt to be used to replace the pattern: */
676 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
677 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
678 sad_oprnd1, plus_oprnd1);
680 if (dump_enabled_p ())
682 dump_printf_loc (MSG_NOTE, vect_location,
683 "vect_recog_sad_pattern: detected: ");
684 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
687 return pattern_stmt;
691 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
692 and LSHIFT_EXPR.
694 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
695 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
697 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
698 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
699 that satisfies the above restrictions, we can perform a widening opeartion
700 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
701 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
703 static bool
704 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
705 tree const_oprnd, tree *oprnd,
706 gimple **wstmt, tree type,
707 tree *half_type, gimple *def_stmt)
709 tree new_type, new_oprnd;
711 if (code != MULT_EXPR && code != LSHIFT_EXPR)
712 return false;
714 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
715 || (code == LSHIFT_EXPR
716 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
717 != 1))
718 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
720 /* CONST_OPRND is a constant of HALF_TYPE. */
721 *oprnd = gimple_assign_rhs1 (def_stmt);
722 return true;
725 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
726 return false;
728 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
729 return false;
731 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
732 a type 2 times bigger than HALF_TYPE. */
733 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
734 TYPE_UNSIGNED (type));
735 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
736 || (code == LSHIFT_EXPR
737 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
738 return false;
740 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
741 *oprnd = gimple_assign_rhs1 (def_stmt);
742 new_oprnd = make_ssa_name (new_type);
743 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
744 *oprnd = new_oprnd;
746 *half_type = new_type;
747 return true;
751 /* Function vect_recog_widen_mult_pattern
753 Try to find the following pattern:
755 type1 a_t;
756 type2 b_t;
757 TYPE a_T, b_T, prod_T;
759 S1 a_t = ;
760 S2 b_t = ;
761 S3 a_T = (TYPE) a_t;
762 S4 b_T = (TYPE) b_t;
763 S5 prod_T = a_T * b_T;
765 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
767 Also detect unsigned cases:
769 unsigned type1 a_t;
770 unsigned type2 b_t;
771 unsigned TYPE u_prod_T;
772 TYPE a_T, b_T, prod_T;
774 S1 a_t = ;
775 S2 b_t = ;
776 S3 a_T = (TYPE) a_t;
777 S4 b_T = (TYPE) b_t;
778 S5 prod_T = a_T * b_T;
779 S6 u_prod_T = (unsigned TYPE) prod_T;
781 and multiplication by constants:
783 type a_t;
784 TYPE a_T, prod_T;
786 S1 a_t = ;
787 S3 a_T = (TYPE) a_t;
788 S5 prod_T = a_T * CONST;
790 A special case of multiplication by constants is when 'TYPE' is 4 times
791 bigger than 'type', but CONST fits an intermediate type 2 times smaller
792 than 'TYPE'. In that case we create an additional pattern stmt for S3
793 to create a variable of the intermediate type, and perform widen-mult
794 on the intermediate type as well:
796 type a_t;
797 interm_type a_it;
798 TYPE a_T, prod_T, prod_T';
800 S1 a_t = ;
801 S3 a_T = (TYPE) a_t;
802 '--> a_it = (interm_type) a_t;
803 S5 prod_T = a_T * CONST;
804 '--> prod_T' = a_it w* CONST;
806 Input/Output:
808 * STMTS: Contains a stmt from which the pattern search begins. In the
809 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
810 is detected. In case of unsigned widen-mult, the original stmt (S5) is
811 replaced with S6 in STMTS. In case of multiplication by a constant
812 of an intermediate type (the last case above), STMTS also contains S3
813 (inserted before S5).
815 Output:
817 * TYPE_IN: The type of the input arguments to the pattern.
819 * TYPE_OUT: The type of the output of this pattern.
821 * Return value: A new stmt that will be used to replace the sequence of
822 stmts that constitute the pattern. In this case it will be:
823 WIDEN_MULT <a_t, b_t>
824 If the result of WIDEN_MULT needs to be converted to a larger type, the
825 returned stmt will be this type conversion stmt.
828 static gimple *
829 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
830 tree *type_in, tree *type_out)
832 gimple *last_stmt = stmts->pop ();
833 gimple *def_stmt0, *def_stmt1;
834 tree oprnd0, oprnd1;
835 tree type, half_type0, half_type1;
836 gimple *new_stmt = NULL, *pattern_stmt = NULL;
837 tree vectype, vecitype;
838 tree var;
839 enum tree_code dummy_code;
840 int dummy_int;
841 vec<tree> dummy_vec;
842 bool op1_ok;
843 bool promotion;
845 if (!is_gimple_assign (last_stmt))
846 return NULL;
848 type = gimple_expr_type (last_stmt);
850 /* Starting from LAST_STMT, follow the defs of its uses in search
851 of the above pattern. */
853 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
854 return NULL;
856 oprnd0 = gimple_assign_rhs1 (last_stmt);
857 oprnd1 = gimple_assign_rhs2 (last_stmt);
858 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
859 || !types_compatible_p (TREE_TYPE (oprnd1), type))
860 return NULL;
862 /* Check argument 0. */
863 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
864 &promotion)
865 || !promotion)
866 return NULL;
867 /* Check argument 1. */
868 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
869 &def_stmt1, &promotion);
871 if (op1_ok && promotion)
873 oprnd0 = gimple_assign_rhs1 (def_stmt0);
874 oprnd1 = gimple_assign_rhs1 (def_stmt1);
876 else
878 if (TREE_CODE (oprnd1) == INTEGER_CST
879 && TREE_CODE (half_type0) == INTEGER_TYPE
880 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
881 &oprnd0, &new_stmt, type,
882 &half_type0, def_stmt0))
884 half_type1 = half_type0;
885 oprnd1 = fold_convert (half_type1, oprnd1);
887 else
888 return NULL;
891 /* If the two arguments have different sizes, convert the one with
892 the smaller type into the larger type. */
893 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
895 /* If we already used up the single-stmt slot give up. */
896 if (new_stmt)
897 return NULL;
899 tree* oprnd = NULL;
900 gimple *def_stmt = NULL;
902 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
904 def_stmt = def_stmt0;
905 half_type0 = half_type1;
906 oprnd = &oprnd0;
908 else
910 def_stmt = def_stmt1;
911 half_type1 = half_type0;
912 oprnd = &oprnd1;
915 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
916 tree new_oprnd = make_ssa_name (half_type0);
917 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
918 *oprnd = new_oprnd;
921 /* Handle unsigned case. Look for
922 S6 u_prod_T = (unsigned TYPE) prod_T;
923 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
924 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
926 gimple *use_stmt;
927 tree use_lhs;
928 tree use_type;
930 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
931 return NULL;
933 use_stmt = vect_single_imm_use (last_stmt);
934 if (!use_stmt || !is_gimple_assign (use_stmt)
935 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
936 return NULL;
938 use_lhs = gimple_assign_lhs (use_stmt);
939 use_type = TREE_TYPE (use_lhs);
940 if (!INTEGRAL_TYPE_P (use_type)
941 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
942 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
943 return NULL;
945 type = use_type;
946 last_stmt = use_stmt;
949 if (!types_compatible_p (half_type0, half_type1))
950 return NULL;
952 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
953 to get an intermediate result of type ITYPE. In this case we need
954 to build a statement to convert this intermediate result to type TYPE. */
955 tree itype = type;
956 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
957 itype = build_nonstandard_integer_type
958 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
959 TYPE_UNSIGNED (type));
961 /* Pattern detected. */
962 if (dump_enabled_p ())
963 dump_printf_loc (MSG_NOTE, vect_location,
964 "vect_recog_widen_mult_pattern: detected:\n");
966 /* Check target support */
967 vectype = get_vectype_for_scalar_type (half_type0);
968 vecitype = get_vectype_for_scalar_type (itype);
969 if (!vectype
970 || !vecitype
971 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
972 vecitype, vectype,
973 &dummy_code, &dummy_code,
974 &dummy_int, &dummy_vec))
975 return NULL;
977 *type_in = vectype;
978 *type_out = get_vectype_for_scalar_type (type);
980 /* Pattern supported. Create a stmt to be used to replace the pattern: */
981 var = vect_recog_temp_ssa_var (itype, NULL);
982 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
984 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
985 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
987 /* If the original two operands have different sizes, we may need to convert
988 the smaller one into the larget type. If this is the case, at this point
989 the new stmt is already built. */
990 if (new_stmt)
992 append_pattern_def_seq (stmt_vinfo, new_stmt);
993 stmt_vec_info new_stmt_info
994 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
995 set_vinfo_for_stmt (new_stmt, new_stmt_info);
996 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
999 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1000 the result of the widen-mult operation into type TYPE. */
1001 if (itype != type)
1003 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1004 stmt_vec_info pattern_stmt_info
1005 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
1006 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1007 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1008 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1009 NOP_EXPR,
1010 gimple_assign_lhs (pattern_stmt));
1013 if (dump_enabled_p ())
1014 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1016 stmts->safe_push (last_stmt);
1017 return pattern_stmt;
1021 /* Function vect_recog_pow_pattern
1023 Try to find the following pattern:
1025 x = POW (y, N);
1027 with POW being one of pow, powf, powi, powif and N being
1028 either 2 or 0.5.
1030 Input:
1032 * LAST_STMT: A stmt from which the pattern search begins.
1034 Output:
1036 * TYPE_IN: The type of the input arguments to the pattern.
1038 * TYPE_OUT: The type of the output of this pattern.
1040 * Return value: A new stmt that will be used to replace the sequence of
1041 stmts that constitute the pattern. In this case it will be:
1042 x = x * x
1044 x = sqrt (x)
1047 static gimple *
1048 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1049 tree *type_out)
1051 gimple *last_stmt = (*stmts)[0];
1052 tree base, exp = NULL;
1053 gimple *stmt;
1054 tree var;
1056 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1057 return NULL;
1059 switch (gimple_call_combined_fn (last_stmt))
1061 CASE_CFN_POW:
1062 CASE_CFN_POWI:
1063 base = gimple_call_arg (last_stmt, 0);
1064 exp = gimple_call_arg (last_stmt, 1);
1065 if (TREE_CODE (exp) != REAL_CST
1066 && TREE_CODE (exp) != INTEGER_CST)
1067 return NULL;
1068 break;
1070 default:
1071 return NULL;
1074 /* We now have a pow or powi builtin function call with a constant
1075 exponent. */
1077 *type_out = NULL_TREE;
1079 /* Catch squaring. */
1080 if ((tree_fits_shwi_p (exp)
1081 && tree_to_shwi (exp) == 2)
1082 || (TREE_CODE (exp) == REAL_CST
1083 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1085 *type_in = TREE_TYPE (base);
1087 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1088 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1089 return stmt;
1092 /* Catch square root. */
1093 if (TREE_CODE (exp) == REAL_CST
1094 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1096 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1097 if (*type_in
1098 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1099 OPTIMIZE_FOR_SPEED))
1101 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1102 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1103 gimple_call_set_lhs (stmt, var);
1104 gimple_call_set_nothrow (stmt, true);
1105 return stmt;
1109 return NULL;
1113 /* Function vect_recog_widen_sum_pattern
1115 Try to find the following pattern:
1117 type x_t;
1118 TYPE x_T, sum = init;
1119 loop:
1120 sum_0 = phi <init, sum_1>
1121 S1 x_t = *p;
1122 S2 x_T = (TYPE) x_t;
1123 S3 sum_1 = x_T + sum_0;
1125 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1126 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1127 a special case of a reduction computation.
1129 Input:
1131 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1132 when this function is called with S3, the pattern {S2,S3} will be detected.
1134 Output:
1136 * TYPE_IN: The type of the input arguments to the pattern.
1138 * TYPE_OUT: The type of the output of this pattern.
1140 * Return value: A new stmt that will be used to replace the sequence of
1141 stmts that constitute the pattern. In this case it will be:
1142 WIDEN_SUM <x_t, sum_0>
1144 Note: The widening-sum idiom is a widening reduction pattern that is
1145 vectorized without preserving all the intermediate results. It
1146 produces only N/2 (widened) results (by summing up pairs of
1147 intermediate results) rather than all N results. Therefore, we
1148 cannot allow this pattern when we want to get all the results and in
1149 the correct order (as is the case when this computation is in an
1150 inner-loop nested in an outer-loop that us being vectorized). */
1152 static gimple *
1153 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1154 tree *type_out)
1156 gimple *stmt, *last_stmt = (*stmts)[0];
1157 tree oprnd0, oprnd1;
1158 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1159 tree type, half_type;
1160 gimple *pattern_stmt;
1161 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1162 struct loop *loop;
1163 tree var;
1164 bool promotion;
1166 if (!loop_info)
1167 return NULL;
1169 loop = LOOP_VINFO_LOOP (loop_info);
1171 /* We don't allow changing the order of the computation in the inner-loop
1172 when doing outer-loop vectorization. */
1173 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1174 return NULL;
1176 if (!is_gimple_assign (last_stmt))
1177 return NULL;
1179 type = gimple_expr_type (last_stmt);
1181 /* Look for the following pattern
1182 DX = (TYPE) X;
1183 sum_1 = DX + sum_0;
1184 In which DX is at least double the size of X, and sum_1 has been
1185 recognized as a reduction variable.
1188 /* Starting from LAST_STMT, follow the defs of its uses in search
1189 of the above pattern. */
1191 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1192 return NULL;
1194 if (!vect_reassociating_reduction_p (stmt_vinfo)
1195 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
1196 return NULL;
1198 oprnd0 = gimple_assign_rhs1 (last_stmt);
1199 oprnd1 = gimple_assign_rhs2 (last_stmt);
1200 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1201 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1202 return NULL;
1204 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1205 we know that oprnd1 is the reduction variable (defined by a loop-header
1206 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1207 Left to check that oprnd0 is defined by a cast from type 'type' to type
1208 'TYPE'. */
1210 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1211 &promotion)
1212 || !promotion)
1213 return NULL;
1215 oprnd0 = gimple_assign_rhs1 (stmt);
1216 *type_in = half_type;
1217 *type_out = type;
1219 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1220 var = vect_recog_temp_ssa_var (type, NULL);
1221 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1223 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_recog_widen_sum_pattern: detected: ");
1227 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1230 return pattern_stmt;
1234 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1236 Input:
1237 STMT - a statement to check.
1238 DEF - we support operations with two operands, one of which is constant.
1239 The other operand can be defined by a demotion operation, or by a
1240 previous statement in a sequence of over-promoted operations. In the
1241 later case DEF is used to replace that operand. (It is defined by a
1242 pattern statement we created for the previous statement in the
1243 sequence).
1245 Input/output:
1246 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1247 NULL, it's the type of DEF.
1248 STMTS - additional pattern statements. If a pattern statement (type
1249 conversion) is created in this function, its original statement is
1250 added to STMTS.
1252 Output:
1253 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1254 operands to use in the new pattern statement for STMT (will be created
1255 in vect_recog_over_widening_pattern ()).
1256 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1257 statements for STMT: the first one is a type promotion and the second
1258 one is the operation itself. We return the type promotion statement
1259 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1260 the second pattern statement. */
1262 static bool
1263 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1264 tree *op0, tree *op1, gimple **new_def_stmt,
1265 vec<gimple *> *stmts)
1267 enum tree_code code;
1268 tree const_oprnd, oprnd;
1269 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1270 gimple *def_stmt, *new_stmt;
1271 bool first = false;
1272 bool promotion;
1274 *op0 = NULL_TREE;
1275 *op1 = NULL_TREE;
1276 *new_def_stmt = NULL;
1278 if (!is_gimple_assign (stmt))
1279 return false;
1281 code = gimple_assign_rhs_code (stmt);
1282 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1283 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1284 return false;
1286 oprnd = gimple_assign_rhs1 (stmt);
1287 const_oprnd = gimple_assign_rhs2 (stmt);
1288 type = gimple_expr_type (stmt);
1290 if (TREE_CODE (oprnd) != SSA_NAME
1291 || TREE_CODE (const_oprnd) != INTEGER_CST)
1292 return false;
1294 /* If oprnd has other uses besides that in stmt we cannot mark it
1295 as being part of a pattern only. */
1296 if (!has_single_use (oprnd))
1297 return false;
1299 /* If we are in the middle of a sequence, we use DEF from a previous
1300 statement. Otherwise, OPRND has to be a result of type promotion. */
1301 if (*new_type)
1303 half_type = *new_type;
1304 oprnd = def;
1306 else
1308 first = true;
1309 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1310 &promotion)
1311 || !promotion
1312 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1313 return false;
1316 /* Can we perform the operation on a smaller type? */
1317 switch (code)
1319 case BIT_IOR_EXPR:
1320 case BIT_XOR_EXPR:
1321 case BIT_AND_EXPR:
1322 if (!int_fits_type_p (const_oprnd, half_type))
1324 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1325 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1326 return false;
1328 interm_type = build_nonstandard_integer_type (
1329 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1330 if (!int_fits_type_p (const_oprnd, interm_type))
1331 return false;
1334 break;
1336 case LSHIFT_EXPR:
1337 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1338 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1339 return false;
1341 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1342 (e.g., if the original value was char, the shift amount is at most 8
1343 if we want to use short). */
1344 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1345 return false;
1347 interm_type = build_nonstandard_integer_type (
1348 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1350 if (!vect_supportable_shift (code, interm_type))
1351 return false;
1353 break;
1355 case RSHIFT_EXPR:
1356 if (vect_supportable_shift (code, half_type))
1357 break;
1359 /* Try intermediate type - HALF_TYPE is not supported. */
1360 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1361 return false;
1363 interm_type = build_nonstandard_integer_type (
1364 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1366 if (!vect_supportable_shift (code, interm_type))
1367 return false;
1369 break;
1371 default:
1372 gcc_unreachable ();
1375 /* There are four possible cases:
1376 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1377 the first statement in the sequence)
1378 a. The original, HALF_TYPE, is not enough - we replace the promotion
1379 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1380 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1381 promotion.
1382 2. OPRND is defined by a pattern statement we created.
1383 a. Its type is not sufficient for the operation, we create a new stmt:
1384 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1385 this statement in NEW_DEF_STMT, and it is later put in
1386 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1387 b. OPRND is good to use in the new statement. */
1388 if (first)
1390 if (interm_type)
1392 /* Replace the original type conversion HALF_TYPE->TYPE with
1393 HALF_TYPE->INTERM_TYPE. */
1394 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1396 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1397 /* Check if the already created pattern stmt is what we need. */
1398 if (!is_gimple_assign (new_stmt)
1399 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1400 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1401 return false;
1403 stmts->safe_push (def_stmt);
1404 oprnd = gimple_assign_lhs (new_stmt);
1406 else
1408 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1409 oprnd = gimple_assign_rhs1 (def_stmt);
1410 new_oprnd = make_ssa_name (interm_type);
1411 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1412 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1413 stmts->safe_push (def_stmt);
1414 oprnd = new_oprnd;
1417 else
1419 /* Retrieve the operand before the type promotion. */
1420 oprnd = gimple_assign_rhs1 (def_stmt);
1423 else
1425 if (interm_type)
1427 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1428 new_oprnd = make_ssa_name (interm_type);
1429 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1430 oprnd = new_oprnd;
1431 *new_def_stmt = new_stmt;
1434 /* Otherwise, OPRND is already set. */
1437 if (interm_type)
1438 *new_type = interm_type;
1439 else
1440 *new_type = half_type;
1442 *op0 = oprnd;
1443 *op1 = fold_convert (*new_type, const_oprnd);
1445 return true;
1449 /* Try to find a statement or a sequence of statements that can be performed
1450 on a smaller type:
1452 type x_t;
1453 TYPE x_T, res0_T, res1_T;
1454 loop:
1455 S1 x_t = *p;
1456 S2 x_T = (TYPE) x_t;
1457 S3 res0_T = op (x_T, C0);
1458 S4 res1_T = op (res0_T, C1);
1459 S5 ... = () res1_T; - type demotion
1461 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1462 constants.
1463 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1464 be 'type' or some intermediate type. For now, we expect S5 to be a type
1465 demotion operation. We also check that S3 and S4 have only one use. */
1467 static gimple *
1468 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1469 tree *type_in, tree *type_out)
1471 gimple *stmt = stmts->pop ();
1472 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1473 *use_stmt = NULL;
1474 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1475 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1476 bool first;
1477 tree type = NULL;
1479 first = true;
1480 while (1)
1482 if (!vinfo_for_stmt (stmt)
1483 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1484 return NULL;
1486 new_def_stmt = NULL;
1487 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1488 &op0, &op1, &new_def_stmt,
1489 stmts))
1491 if (first)
1492 return NULL;
1493 else
1494 break;
1497 /* STMT can be performed on a smaller type. Check its uses. */
1498 use_stmt = vect_single_imm_use (stmt);
1499 if (!use_stmt || !is_gimple_assign (use_stmt))
1500 return NULL;
1502 /* Create pattern statement for STMT. */
1503 vectype = get_vectype_for_scalar_type (new_type);
1504 if (!vectype)
1505 return NULL;
1507 /* We want to collect all the statements for which we create pattern
1508 statetments, except for the case when the last statement in the
1509 sequence doesn't have a corresponding pattern statement. In such
1510 case we associate the last pattern statement with the last statement
1511 in the sequence. Therefore, we only add the original statement to
1512 the list if we know that it is not the last. */
1513 if (prev_stmt)
1514 stmts->safe_push (prev_stmt);
1516 var = vect_recog_temp_ssa_var (new_type, NULL);
1517 pattern_stmt
1518 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1519 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1520 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1522 if (dump_enabled_p ())
1524 dump_printf_loc (MSG_NOTE, vect_location,
1525 "created pattern stmt: ");
1526 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1529 type = gimple_expr_type (stmt);
1530 prev_stmt = stmt;
1531 stmt = use_stmt;
1533 first = false;
1536 /* We got a sequence. We expect it to end with a type demotion operation.
1537 Otherwise, we quit (for now). There are three possible cases: the
1538 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1539 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1540 NEW_TYPE differs (we create a new conversion statement). */
1541 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1543 use_lhs = gimple_assign_lhs (use_stmt);
1544 use_type = TREE_TYPE (use_lhs);
1545 /* Support only type demotion or signedess change. */
1546 if (!INTEGRAL_TYPE_P (use_type)
1547 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1548 return NULL;
1550 /* Check that NEW_TYPE is not bigger than the conversion result. */
1551 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1552 return NULL;
1554 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1555 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1557 /* Create NEW_TYPE->USE_TYPE conversion. */
1558 new_oprnd = make_ssa_name (use_type);
1559 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1560 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1562 *type_in = get_vectype_for_scalar_type (new_type);
1563 *type_out = get_vectype_for_scalar_type (use_type);
1565 /* We created a pattern statement for the last statement in the
1566 sequence, so we don't need to associate it with the pattern
1567 statement created for PREV_STMT. Therefore, we add PREV_STMT
1568 to the list in order to mark it later in vect_pattern_recog_1. */
1569 if (prev_stmt)
1570 stmts->safe_push (prev_stmt);
1572 else
1574 if (prev_stmt)
1575 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1576 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1578 *type_in = vectype;
1579 *type_out = NULL_TREE;
1582 stmts->safe_push (use_stmt);
1584 else
1585 /* TODO: support general case, create a conversion to the correct type. */
1586 return NULL;
1588 /* Pattern detected. */
1589 if (dump_enabled_p ())
1591 dump_printf_loc (MSG_NOTE, vect_location,
1592 "vect_recog_over_widening_pattern: detected: ");
1593 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1596 return pattern_stmt;
1599 /* Detect widening shift pattern:
1601 type a_t;
1602 TYPE a_T, res_T;
1604 S1 a_t = ;
1605 S2 a_T = (TYPE) a_t;
1606 S3 res_T = a_T << CONST;
1608 where type 'TYPE' is at least double the size of type 'type'.
1610 Also detect cases where the shift result is immediately converted
1611 to another type 'result_type' that is no larger in size than 'TYPE'.
1612 In those cases we perform a widen-shift that directly results in
1613 'result_type', to avoid a possible over-widening situation:
1615 type a_t;
1616 TYPE a_T, res_T;
1617 result_type res_result;
1619 S1 a_t = ;
1620 S2 a_T = (TYPE) a_t;
1621 S3 res_T = a_T << CONST;
1622 S4 res_result = (result_type) res_T;
1623 '--> res_result' = a_t w<< CONST;
1625 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1626 create an additional pattern stmt for S2 to create a variable of an
1627 intermediate type, and perform widen-shift on the intermediate type:
1629 type a_t;
1630 interm_type a_it;
1631 TYPE a_T, res_T, res_T';
1633 S1 a_t = ;
1634 S2 a_T = (TYPE) a_t;
1635 '--> a_it = (interm_type) a_t;
1636 S3 res_T = a_T << CONST;
1637 '--> res_T' = a_it <<* CONST;
1639 Input/Output:
1641 * STMTS: Contains a stmt from which the pattern search begins.
1642 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1643 in STMTS. When an intermediate type is used and a pattern statement is
1644 created for S2, we also put S2 here (before S3).
1646 Output:
1648 * TYPE_IN: The type of the input arguments to the pattern.
1650 * TYPE_OUT: The type of the output of this pattern.
1652 * Return value: A new stmt that will be used to replace the sequence of
1653 stmts that constitute the pattern. In this case it will be:
1654 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1656 static gimple *
1657 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1658 tree *type_in, tree *type_out)
1660 gimple *last_stmt = stmts->pop ();
1661 gimple *def_stmt0;
1662 tree oprnd0, oprnd1;
1663 tree type, half_type0;
1664 gimple *pattern_stmt;
1665 tree vectype, vectype_out = NULL_TREE;
1666 tree var;
1667 enum tree_code dummy_code;
1668 int dummy_int;
1669 vec<tree> dummy_vec;
1670 gimple *use_stmt;
1671 bool promotion;
1673 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1674 return NULL;
1676 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1677 return NULL;
1679 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1680 return NULL;
1682 oprnd0 = gimple_assign_rhs1 (last_stmt);
1683 oprnd1 = gimple_assign_rhs2 (last_stmt);
1684 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1685 return NULL;
1687 /* Check operand 0: it has to be defined by a type promotion. */
1688 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1689 &promotion)
1690 || !promotion)
1691 return NULL;
1693 /* Check operand 1: has to be positive. We check that it fits the type
1694 in vect_handle_widen_op_by_const (). */
1695 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1696 return NULL;
1698 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1699 type = gimple_expr_type (last_stmt);
1701 /* Check for subsequent conversion to another type. */
1702 use_stmt = vect_single_imm_use (last_stmt);
1703 if (use_stmt && is_gimple_assign (use_stmt)
1704 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1705 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1707 tree use_lhs = gimple_assign_lhs (use_stmt);
1708 tree use_type = TREE_TYPE (use_lhs);
1710 if (INTEGRAL_TYPE_P (use_type)
1711 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1713 last_stmt = use_stmt;
1714 type = use_type;
1718 /* Check if this a widening operation. */
1719 gimple *wstmt = NULL;
1720 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1721 &oprnd0, &wstmt,
1722 type, &half_type0, def_stmt0))
1723 return NULL;
1725 /* Pattern detected. */
1726 if (dump_enabled_p ())
1727 dump_printf_loc (MSG_NOTE, vect_location,
1728 "vect_recog_widen_shift_pattern: detected:\n");
1730 /* Check target support. */
1731 vectype = get_vectype_for_scalar_type (half_type0);
1732 vectype_out = get_vectype_for_scalar_type (type);
1734 if (!vectype
1735 || !vectype_out
1736 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1737 vectype_out, vectype,
1738 &dummy_code, &dummy_code,
1739 &dummy_int, &dummy_vec))
1740 return NULL;
1742 *type_in = vectype;
1743 *type_out = vectype_out;
1745 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1746 var = vect_recog_temp_ssa_var (type, NULL);
1747 pattern_stmt =
1748 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1749 if (wstmt)
1751 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1752 new_pattern_def_seq (stmt_vinfo, wstmt);
1753 stmt_vec_info new_stmt_info
1754 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1755 set_vinfo_for_stmt (wstmt, new_stmt_info);
1756 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1759 if (dump_enabled_p ())
1760 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1762 stmts->safe_push (last_stmt);
1763 return pattern_stmt;
1766 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1768 type a_t, b_t, c_t;
1770 S0 a_t = b_t r<< c_t;
1772 Input/Output:
1774 * STMTS: Contains a stmt from which the pattern search begins,
1775 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1776 with a sequence:
1778 S1 d_t = -c_t;
1779 S2 e_t = d_t & (B - 1);
1780 S3 f_t = b_t << c_t;
1781 S4 g_t = b_t >> e_t;
1782 S0 a_t = f_t | g_t;
1784 where B is element bitsize of type.
1786 Output:
1788 * TYPE_IN: The type of the input arguments to the pattern.
1790 * TYPE_OUT: The type of the output of this pattern.
1792 * Return value: A new stmt that will be used to replace the rotate
1793 S0 stmt. */
1795 static gimple *
1796 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1798 gimple *last_stmt = stmts->pop ();
1799 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1800 gimple *pattern_stmt, *def_stmt;
1801 enum tree_code rhs_code;
1802 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1803 vec_info *vinfo = stmt_vinfo->vinfo;
1804 enum vect_def_type dt;
1805 optab optab1, optab2;
1806 edge ext_def = NULL;
1808 if (!is_gimple_assign (last_stmt))
1809 return NULL;
1811 rhs_code = gimple_assign_rhs_code (last_stmt);
1812 switch (rhs_code)
1814 case LROTATE_EXPR:
1815 case RROTATE_EXPR:
1816 break;
1817 default:
1818 return NULL;
1821 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1822 return NULL;
1824 lhs = gimple_assign_lhs (last_stmt);
1825 oprnd0 = gimple_assign_rhs1 (last_stmt);
1826 type = TREE_TYPE (oprnd0);
1827 oprnd1 = gimple_assign_rhs2 (last_stmt);
1828 if (TREE_CODE (oprnd0) != SSA_NAME
1829 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1830 || !INTEGRAL_TYPE_P (type)
1831 || !TYPE_UNSIGNED (type))
1832 return NULL;
1834 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1835 return NULL;
1837 if (dt != vect_internal_def
1838 && dt != vect_constant_def
1839 && dt != vect_external_def)
1840 return NULL;
1842 vectype = get_vectype_for_scalar_type (type);
1843 if (vectype == NULL_TREE)
1844 return NULL;
1846 /* If vector/vector or vector/scalar rotate is supported by the target,
1847 don't do anything here. */
1848 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1849 if (optab1
1850 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1851 return NULL;
1853 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1855 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1856 if (optab2
1857 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1858 return NULL;
1861 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1862 don't do anything here either. */
1863 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1864 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1865 if (!optab1
1866 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1867 || !optab2
1868 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1870 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1871 return NULL;
1872 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1873 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1874 if (!optab1
1875 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1876 || !optab2
1877 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1878 return NULL;
1881 *type_in = vectype;
1882 *type_out = vectype;
1883 if (*type_in == NULL_TREE)
1884 return NULL;
1886 if (dt == vect_external_def
1887 && TREE_CODE (oprnd1) == SSA_NAME
1888 && is_a <loop_vec_info> (vinfo))
1890 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1891 ext_def = loop_preheader_edge (loop);
1892 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1894 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1895 if (bb == NULL
1896 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1897 ext_def = NULL;
1901 def = NULL_TREE;
1902 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1903 if (TREE_CODE (oprnd1) == INTEGER_CST
1904 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1905 def = oprnd1;
1906 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1908 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1909 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1910 && TYPE_PRECISION (TREE_TYPE (rhs1))
1911 == TYPE_PRECISION (type))
1912 def = rhs1;
1915 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1916 if (def == NULL_TREE)
1918 def = vect_recog_temp_ssa_var (type, NULL);
1919 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1920 if (ext_def)
1922 basic_block new_bb
1923 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1924 gcc_assert (!new_bb);
1926 else
1927 append_pattern_def_seq (stmt_vinfo, def_stmt);
1929 stype = TREE_TYPE (def);
1930 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1932 if (TREE_CODE (def) == INTEGER_CST)
1934 if (!tree_fits_uhwi_p (def)
1935 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
1936 || integer_zerop (def))
1937 return NULL;
1938 def2 = build_int_cst (stype,
1939 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
1941 else
1943 tree vecstype = get_vectype_for_scalar_type (stype);
1944 stmt_vec_info def_stmt_vinfo;
1946 if (vecstype == NULL_TREE)
1947 return NULL;
1948 def2 = vect_recog_temp_ssa_var (stype, NULL);
1949 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1950 if (ext_def)
1952 basic_block new_bb
1953 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1954 gcc_assert (!new_bb);
1956 else
1958 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1959 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1960 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1961 append_pattern_def_seq (stmt_vinfo, def_stmt);
1964 def2 = vect_recog_temp_ssa_var (stype, NULL);
1965 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
1966 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1967 gimple_assign_lhs (def_stmt), mask);
1968 if (ext_def)
1970 basic_block new_bb
1971 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1972 gcc_assert (!new_bb);
1974 else
1976 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1977 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1978 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1979 append_pattern_def_seq (stmt_vinfo, def_stmt);
1983 var1 = vect_recog_temp_ssa_var (type, NULL);
1984 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1985 ? LSHIFT_EXPR : RSHIFT_EXPR,
1986 oprnd0, def);
1987 append_pattern_def_seq (stmt_vinfo, def_stmt);
1989 var2 = vect_recog_temp_ssa_var (type, NULL);
1990 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1991 ? RSHIFT_EXPR : LSHIFT_EXPR,
1992 oprnd0, def2);
1993 append_pattern_def_seq (stmt_vinfo, def_stmt);
1995 /* Pattern detected. */
1996 if (dump_enabled_p ())
1997 dump_printf_loc (MSG_NOTE, vect_location,
1998 "vect_recog_rotate_pattern: detected:\n");
2000 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2001 var = vect_recog_temp_ssa_var (type, NULL);
2002 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2004 if (dump_enabled_p ())
2005 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2007 stmts->safe_push (last_stmt);
2008 return pattern_stmt;
2011 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2012 vectorized:
2014 type a_t;
2015 TYPE b_T, res_T;
2017 S1 a_t = ;
2018 S2 b_T = ;
2019 S3 res_T = b_T op a_t;
2021 where type 'TYPE' is a type with different size than 'type',
2022 and op is <<, >> or rotate.
2024 Also detect cases:
2026 type a_t;
2027 TYPE b_T, c_T, res_T;
2029 S0 c_T = ;
2030 S1 a_t = (type) c_T;
2031 S2 b_T = ;
2032 S3 res_T = b_T op a_t;
2034 Input/Output:
2036 * STMTS: Contains a stmt from which the pattern search begins,
2037 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2038 with a shift/rotate which has same type on both operands, in the
2039 second case just b_T op c_T, in the first case with added cast
2040 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2042 Output:
2044 * TYPE_IN: The type of the input arguments to the pattern.
2046 * TYPE_OUT: The type of the output of this pattern.
2048 * Return value: A new stmt that will be used to replace the shift/rotate
2049 S3 stmt. */
2051 static gimple *
2052 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2053 tree *type_in, tree *type_out)
2055 gimple *last_stmt = stmts->pop ();
2056 tree oprnd0, oprnd1, lhs, var;
2057 gimple *pattern_stmt, *def_stmt;
2058 enum tree_code rhs_code;
2059 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2060 vec_info *vinfo = stmt_vinfo->vinfo;
2061 enum vect_def_type dt;
2063 if (!is_gimple_assign (last_stmt))
2064 return NULL;
2066 rhs_code = gimple_assign_rhs_code (last_stmt);
2067 switch (rhs_code)
2069 case LSHIFT_EXPR:
2070 case RSHIFT_EXPR:
2071 case LROTATE_EXPR:
2072 case RROTATE_EXPR:
2073 break;
2074 default:
2075 return NULL;
2078 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2079 return NULL;
2081 lhs = gimple_assign_lhs (last_stmt);
2082 oprnd0 = gimple_assign_rhs1 (last_stmt);
2083 oprnd1 = gimple_assign_rhs2 (last_stmt);
2084 if (TREE_CODE (oprnd0) != SSA_NAME
2085 || TREE_CODE (oprnd1) != SSA_NAME
2086 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2087 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2088 || TYPE_PRECISION (TREE_TYPE (lhs))
2089 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2090 return NULL;
2092 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2093 return NULL;
2095 if (dt != vect_internal_def)
2096 return NULL;
2098 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2099 *type_out = *type_in;
2100 if (*type_in == NULL_TREE)
2101 return NULL;
2103 tree def = NULL_TREE;
2104 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2105 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2107 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2108 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2109 && TYPE_PRECISION (TREE_TYPE (rhs1))
2110 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2112 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2113 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2114 def = rhs1;
2115 else
2117 tree mask
2118 = build_low_bits_mask (TREE_TYPE (rhs1),
2119 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2120 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2121 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2122 new_pattern_def_seq (stmt_vinfo, def_stmt);
2127 if (def == NULL_TREE)
2129 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2130 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2131 new_pattern_def_seq (stmt_vinfo, def_stmt);
2134 /* Pattern detected. */
2135 if (dump_enabled_p ())
2136 dump_printf_loc (MSG_NOTE, vect_location,
2137 "vect_recog_vector_vector_shift_pattern: detected:\n");
2139 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2140 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2141 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2143 if (dump_enabled_p ())
2144 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2146 stmts->safe_push (last_stmt);
2147 return pattern_stmt;
2150 /* Return true iff the target has a vector optab implementing the operation
2151 CODE on type VECTYPE. */
2153 static bool
2154 target_has_vecop_for_code (tree_code code, tree vectype)
2156 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2157 return voptab
2158 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2161 /* Verify that the target has optabs of VECTYPE to perform all the steps
2162 needed by the multiplication-by-immediate synthesis algorithm described by
2163 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2164 present. Return true iff the target supports all the steps. */
2166 static bool
2167 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2168 tree vectype, bool synth_shift_p)
2170 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2171 return false;
2173 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2174 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2176 if (var == negate_variant
2177 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2178 return false;
2180 /* If we must synthesize shifts with additions make sure that vector
2181 addition is available. */
2182 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2183 return false;
2185 for (int i = 1; i < alg->ops; i++)
2187 switch (alg->op[i])
2189 case alg_shift:
2190 break;
2191 case alg_add_t_m2:
2192 case alg_add_t2_m:
2193 case alg_add_factor:
2194 if (!supports_vplus)
2195 return false;
2196 break;
2197 case alg_sub_t_m2:
2198 case alg_sub_t2_m:
2199 case alg_sub_factor:
2200 if (!supports_vminus)
2201 return false;
2202 break;
2203 case alg_unknown:
2204 case alg_m:
2205 case alg_zero:
2206 case alg_impossible:
2207 return false;
2208 default:
2209 gcc_unreachable ();
2213 return true;
2216 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2217 putting the final result in DEST. Append all statements but the last into
2218 VINFO. Return the last statement. */
2220 static gimple *
2221 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2222 stmt_vec_info vinfo)
2224 HOST_WIDE_INT i;
2225 tree itype = TREE_TYPE (op);
2226 tree prev_res = op;
2227 gcc_assert (amnt >= 0);
2228 for (i = 0; i < amnt; i++)
2230 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2231 : dest;
2232 gimple *stmt
2233 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2234 prev_res = tmp_var;
2235 if (i < amnt - 1)
2236 append_pattern_def_seq (vinfo, stmt);
2237 else
2238 return stmt;
2240 gcc_unreachable ();
2241 return NULL;
2244 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2245 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2246 the process if necessary. Append the resulting assignment statements
2247 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2248 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2249 left shifts using additions. */
2251 static tree
2252 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2253 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2255 if (integer_zerop (op2)
2256 && (code == LSHIFT_EXPR
2257 || code == PLUS_EXPR))
2259 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2260 return op1;
2263 gimple *stmt;
2264 tree itype = TREE_TYPE (op1);
2265 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2267 if (code == LSHIFT_EXPR
2268 && synth_shift_p)
2270 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2271 stmt_vinfo);
2272 append_pattern_def_seq (stmt_vinfo, stmt);
2273 return tmp_var;
2276 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2277 append_pattern_def_seq (stmt_vinfo, stmt);
2278 return tmp_var;
2281 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2282 and simple arithmetic operations to be vectorized. Record the statements
2283 produced in STMT_VINFO and return the last statement in the sequence or
2284 NULL if it's not possible to synthesize such a multiplication.
2285 This function mirrors the behavior of expand_mult_const in expmed.c but
2286 works on tree-ssa form. */
2288 static gimple *
2289 vect_synth_mult_by_constant (tree op, tree val,
2290 stmt_vec_info stmt_vinfo)
2292 tree itype = TREE_TYPE (op);
2293 machine_mode mode = TYPE_MODE (itype);
2294 struct algorithm alg;
2295 mult_variant variant;
2296 if (!tree_fits_shwi_p (val))
2297 return NULL;
2299 /* Multiplication synthesis by shifts, adds and subs can introduce
2300 signed overflow where the original operation didn't. Perform the
2301 operations on an unsigned type and cast back to avoid this.
2302 In the future we may want to relax this for synthesis algorithms
2303 that we can prove do not cause unexpected overflow. */
2304 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2306 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2308 /* Targets that don't support vector shifts but support vector additions
2309 can synthesize shifts that way. */
2310 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2312 HOST_WIDE_INT hwval = tree_to_shwi (val);
2313 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2314 The vectorizer's benefit analysis will decide whether it's beneficial
2315 to do this. */
2316 bool possible = choose_mult_variant (mode, hwval, &alg,
2317 &variant, MAX_COST);
2318 if (!possible)
2319 return NULL;
2321 tree vectype = get_vectype_for_scalar_type (multtype);
2323 if (!vectype
2324 || !target_supports_mult_synth_alg (&alg, variant,
2325 vectype, synth_shift_p))
2326 return NULL;
2328 tree accumulator;
2330 /* Clear out the sequence of statements so we can populate it below. */
2331 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2332 gimple *stmt = NULL;
2334 if (cast_to_unsigned_p)
2336 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2337 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2338 append_pattern_def_seq (stmt_vinfo, stmt);
2339 op = tmp_op;
2342 if (alg.op[0] == alg_zero)
2343 accumulator = build_int_cst (multtype, 0);
2344 else
2345 accumulator = op;
2347 bool needs_fixup = (variant == negate_variant)
2348 || (variant == add_variant);
2350 for (int i = 1; i < alg.ops; i++)
2352 tree shft_log = build_int_cst (multtype, alg.log[i]);
2353 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2354 tree tmp_var = NULL_TREE;
2356 switch (alg.op[i])
2358 case alg_shift:
2359 if (synth_shift_p)
2360 stmt
2361 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2362 stmt_vinfo);
2363 else
2364 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2365 shft_log);
2366 break;
2367 case alg_add_t_m2:
2368 tmp_var
2369 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2370 stmt_vinfo, synth_shift_p);
2371 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2372 tmp_var);
2373 break;
2374 case alg_sub_t_m2:
2375 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2376 shft_log, stmt_vinfo,
2377 synth_shift_p);
2378 /* In some algorithms the first step involves zeroing the
2379 accumulator. If subtracting from such an accumulator
2380 just emit the negation directly. */
2381 if (integer_zerop (accumulator))
2382 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2383 else
2384 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2385 tmp_var);
2386 break;
2387 case alg_add_t2_m:
2388 tmp_var
2389 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2390 stmt_vinfo, synth_shift_p);
2391 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2392 break;
2393 case alg_sub_t2_m:
2394 tmp_var
2395 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2396 stmt_vinfo, synth_shift_p);
2397 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2398 break;
2399 case alg_add_factor:
2400 tmp_var
2401 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2402 stmt_vinfo, synth_shift_p);
2403 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2404 tmp_var);
2405 break;
2406 case alg_sub_factor:
2407 tmp_var
2408 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2409 stmt_vinfo, synth_shift_p);
2410 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2411 accumulator);
2412 break;
2413 default:
2414 gcc_unreachable ();
2416 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2417 but rather return it directly. */
2419 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2420 append_pattern_def_seq (stmt_vinfo, stmt);
2421 accumulator = accum_tmp;
2423 if (variant == negate_variant)
2425 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2426 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2427 accumulator = accum_tmp;
2428 if (cast_to_unsigned_p)
2429 append_pattern_def_seq (stmt_vinfo, stmt);
2431 else if (variant == add_variant)
2433 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2434 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2435 accumulator = accum_tmp;
2436 if (cast_to_unsigned_p)
2437 append_pattern_def_seq (stmt_vinfo, stmt);
2439 /* Move back to a signed if needed. */
2440 if (cast_to_unsigned_p)
2442 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2443 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2446 return stmt;
2449 /* Detect multiplication by constant and convert it into a sequence of
2450 shifts and additions, subtractions, negations. We reuse the
2451 choose_mult_variant algorithms from expmed.c
2453 Input/Output:
2455 STMTS: Contains a stmt from which the pattern search begins,
2456 i.e. the mult stmt.
2458 Output:
2460 * TYPE_IN: The type of the input arguments to the pattern.
2462 * TYPE_OUT: The type of the output of this pattern.
2464 * Return value: A new stmt that will be used to replace
2465 the multiplication. */
2467 static gimple *
2468 vect_recog_mult_pattern (vec<gimple *> *stmts,
2469 tree *type_in, tree *type_out)
2471 gimple *last_stmt = stmts->pop ();
2472 tree oprnd0, oprnd1, vectype, itype;
2473 gimple *pattern_stmt;
2474 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2476 if (!is_gimple_assign (last_stmt))
2477 return NULL;
2479 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2480 return NULL;
2482 oprnd0 = gimple_assign_rhs1 (last_stmt);
2483 oprnd1 = gimple_assign_rhs2 (last_stmt);
2484 itype = TREE_TYPE (oprnd0);
2486 if (TREE_CODE (oprnd0) != SSA_NAME
2487 || TREE_CODE (oprnd1) != INTEGER_CST
2488 || !INTEGRAL_TYPE_P (itype)
2489 || !type_has_mode_precision_p (itype))
2490 return NULL;
2492 vectype = get_vectype_for_scalar_type (itype);
2493 if (vectype == NULL_TREE)
2494 return NULL;
2496 /* If the target can handle vectorized multiplication natively,
2497 don't attempt to optimize this. */
2498 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2499 if (mul_optab != unknown_optab)
2501 machine_mode vec_mode = TYPE_MODE (vectype);
2502 int icode = (int) optab_handler (mul_optab, vec_mode);
2503 if (icode != CODE_FOR_nothing)
2504 return NULL;
2507 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2508 if (!pattern_stmt)
2509 return NULL;
2511 /* Pattern detected. */
2512 if (dump_enabled_p ())
2513 dump_printf_loc (MSG_NOTE, vect_location,
2514 "vect_recog_mult_pattern: detected:\n");
2516 if (dump_enabled_p ())
2517 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2518 pattern_stmt,0);
2520 stmts->safe_push (last_stmt);
2521 *type_in = vectype;
2522 *type_out = vectype;
2524 return pattern_stmt;
2527 /* Detect a signed division by a constant that wouldn't be
2528 otherwise vectorized:
2530 type a_t, b_t;
2532 S1 a_t = b_t / N;
2534 where type 'type' is an integral type and N is a constant.
2536 Similarly handle modulo by a constant:
2538 S4 a_t = b_t % N;
2540 Input/Output:
2542 * STMTS: Contains a stmt from which the pattern search begins,
2543 i.e. the division stmt. S1 is replaced by if N is a power
2544 of two constant and type is signed:
2545 S3 y_t = b_t < 0 ? N - 1 : 0;
2546 S2 x_t = b_t + y_t;
2547 S1' a_t = x_t >> log2 (N);
2549 S4 is replaced if N is a power of two constant and
2550 type is signed by (where *_T temporaries have unsigned type):
2551 S9 y_T = b_t < 0 ? -1U : 0U;
2552 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2553 S7 z_t = (type) z_T;
2554 S6 w_t = b_t + z_t;
2555 S5 x_t = w_t & (N - 1);
2556 S4' a_t = x_t - z_t;
2558 Output:
2560 * TYPE_IN: The type of the input arguments to the pattern.
2562 * TYPE_OUT: The type of the output of this pattern.
2564 * Return value: A new stmt that will be used to replace the division
2565 S1 or modulo S4 stmt. */
2567 static gimple *
2568 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2569 tree *type_in, tree *type_out)
2571 gimple *last_stmt = stmts->pop ();
2572 tree oprnd0, oprnd1, vectype, itype, cond;
2573 gimple *pattern_stmt, *def_stmt;
2574 enum tree_code rhs_code;
2575 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2576 vec_info *vinfo = stmt_vinfo->vinfo;
2577 optab optab;
2578 tree q;
2579 int dummy_int, prec;
2580 stmt_vec_info def_stmt_vinfo;
2582 if (!is_gimple_assign (last_stmt))
2583 return NULL;
2585 rhs_code = gimple_assign_rhs_code (last_stmt);
2586 switch (rhs_code)
2588 case TRUNC_DIV_EXPR:
2589 case TRUNC_MOD_EXPR:
2590 break;
2591 default:
2592 return NULL;
2595 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2596 return NULL;
2598 oprnd0 = gimple_assign_rhs1 (last_stmt);
2599 oprnd1 = gimple_assign_rhs2 (last_stmt);
2600 itype = TREE_TYPE (oprnd0);
2601 if (TREE_CODE (oprnd0) != SSA_NAME
2602 || TREE_CODE (oprnd1) != INTEGER_CST
2603 || TREE_CODE (itype) != INTEGER_TYPE
2604 || !type_has_mode_precision_p (itype))
2605 return NULL;
2607 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2608 vectype = get_vectype_for_scalar_type (itype);
2609 if (vectype == NULL_TREE)
2610 return NULL;
2612 /* If the target can handle vectorized division or modulo natively,
2613 don't attempt to optimize this. */
2614 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2615 if (optab != unknown_optab)
2617 machine_mode vec_mode = TYPE_MODE (vectype);
2618 int icode = (int) optab_handler (optab, vec_mode);
2619 if (icode != CODE_FOR_nothing)
2620 return NULL;
2623 prec = TYPE_PRECISION (itype);
2624 if (integer_pow2p (oprnd1))
2626 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2627 return NULL;
2629 /* Pattern detected. */
2630 if (dump_enabled_p ())
2631 dump_printf_loc (MSG_NOTE, vect_location,
2632 "vect_recog_divmod_pattern: detected:\n");
2634 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2635 build_int_cst (itype, 0));
2636 if (rhs_code == TRUNC_DIV_EXPR)
2638 tree var = vect_recog_temp_ssa_var (itype, NULL);
2639 tree shift;
2640 def_stmt
2641 = gimple_build_assign (var, COND_EXPR, cond,
2642 fold_build2 (MINUS_EXPR, itype, oprnd1,
2643 build_int_cst (itype, 1)),
2644 build_int_cst (itype, 0));
2645 new_pattern_def_seq (stmt_vinfo, def_stmt);
2646 var = vect_recog_temp_ssa_var (itype, NULL);
2647 def_stmt
2648 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2649 gimple_assign_lhs (def_stmt));
2650 append_pattern_def_seq (stmt_vinfo, def_stmt);
2652 shift = build_int_cst (itype, tree_log2 (oprnd1));
2653 pattern_stmt
2654 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2655 RSHIFT_EXPR, var, shift);
2657 else
2659 tree signmask;
2660 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2661 if (compare_tree_int (oprnd1, 2) == 0)
2663 signmask = vect_recog_temp_ssa_var (itype, NULL);
2664 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2665 build_int_cst (itype, 1),
2666 build_int_cst (itype, 0));
2667 append_pattern_def_seq (stmt_vinfo, def_stmt);
2669 else
2671 tree utype
2672 = build_nonstandard_integer_type (prec, 1);
2673 tree vecutype = get_vectype_for_scalar_type (utype);
2674 tree shift
2675 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2676 - tree_log2 (oprnd1));
2677 tree var = vect_recog_temp_ssa_var (utype, NULL);
2679 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2680 build_int_cst (utype, -1),
2681 build_int_cst (utype, 0));
2682 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2683 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2684 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2685 append_pattern_def_seq (stmt_vinfo, def_stmt);
2686 var = vect_recog_temp_ssa_var (utype, NULL);
2687 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2688 gimple_assign_lhs (def_stmt),
2689 shift);
2690 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2691 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2692 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2693 append_pattern_def_seq (stmt_vinfo, def_stmt);
2694 signmask = vect_recog_temp_ssa_var (itype, NULL);
2695 def_stmt
2696 = gimple_build_assign (signmask, NOP_EXPR, var);
2697 append_pattern_def_seq (stmt_vinfo, def_stmt);
2699 def_stmt
2700 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2701 PLUS_EXPR, oprnd0, signmask);
2702 append_pattern_def_seq (stmt_vinfo, def_stmt);
2703 def_stmt
2704 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2705 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2706 fold_build2 (MINUS_EXPR, itype, oprnd1,
2707 build_int_cst (itype, 1)));
2708 append_pattern_def_seq (stmt_vinfo, def_stmt);
2710 pattern_stmt
2711 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2712 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2713 signmask);
2716 if (dump_enabled_p ())
2717 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2720 stmts->safe_push (last_stmt);
2722 *type_in = vectype;
2723 *type_out = vectype;
2724 return pattern_stmt;
2727 if (prec > HOST_BITS_PER_WIDE_INT
2728 || integer_zerop (oprnd1))
2729 return NULL;
2731 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2732 return NULL;
2734 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2736 if (TYPE_UNSIGNED (itype))
2738 unsigned HOST_WIDE_INT mh, ml;
2739 int pre_shift, post_shift;
2740 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2741 & GET_MODE_MASK (itype_mode));
2742 tree t1, t2, t3, t4;
2744 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2745 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2746 return NULL;
2748 /* Find a suitable multiplier and right shift count
2749 instead of multiplying with D. */
2750 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2752 /* If the suggested multiplier is more than SIZE bits, we can do better
2753 for even divisors, using an initial right shift. */
2754 if (mh != 0 && (d & 1) == 0)
2756 pre_shift = ctz_or_zero (d);
2757 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2758 &ml, &post_shift, &dummy_int);
2759 gcc_assert (!mh);
2761 else
2762 pre_shift = 0;
2764 if (mh != 0)
2766 if (post_shift - 1 >= prec)
2767 return NULL;
2769 /* t1 = oprnd0 h* ml;
2770 t2 = oprnd0 - t1;
2771 t3 = t2 >> 1;
2772 t4 = t1 + t3;
2773 q = t4 >> (post_shift - 1); */
2774 t1 = vect_recog_temp_ssa_var (itype, NULL);
2775 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2776 build_int_cst (itype, ml));
2777 append_pattern_def_seq (stmt_vinfo, def_stmt);
2779 t2 = vect_recog_temp_ssa_var (itype, NULL);
2780 def_stmt
2781 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2782 append_pattern_def_seq (stmt_vinfo, def_stmt);
2784 t3 = vect_recog_temp_ssa_var (itype, NULL);
2785 def_stmt
2786 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2787 append_pattern_def_seq (stmt_vinfo, def_stmt);
2789 t4 = vect_recog_temp_ssa_var (itype, NULL);
2790 def_stmt
2791 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2793 if (post_shift != 1)
2795 append_pattern_def_seq (stmt_vinfo, def_stmt);
2797 q = vect_recog_temp_ssa_var (itype, NULL);
2798 pattern_stmt
2799 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2800 build_int_cst (itype, post_shift - 1));
2802 else
2804 q = t4;
2805 pattern_stmt = def_stmt;
2808 else
2810 if (pre_shift >= prec || post_shift >= prec)
2811 return NULL;
2813 /* t1 = oprnd0 >> pre_shift;
2814 t2 = t1 h* ml;
2815 q = t2 >> post_shift; */
2816 if (pre_shift)
2818 t1 = vect_recog_temp_ssa_var (itype, NULL);
2819 def_stmt
2820 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2821 build_int_cst (NULL, pre_shift));
2822 append_pattern_def_seq (stmt_vinfo, def_stmt);
2824 else
2825 t1 = oprnd0;
2827 t2 = vect_recog_temp_ssa_var (itype, NULL);
2828 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2829 build_int_cst (itype, ml));
2831 if (post_shift)
2833 append_pattern_def_seq (stmt_vinfo, def_stmt);
2835 q = vect_recog_temp_ssa_var (itype, NULL);
2836 def_stmt
2837 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2838 build_int_cst (itype, post_shift));
2840 else
2841 q = t2;
2843 pattern_stmt = def_stmt;
2846 else
2848 unsigned HOST_WIDE_INT ml;
2849 int post_shift;
2850 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2851 unsigned HOST_WIDE_INT abs_d;
2852 bool add = false;
2853 tree t1, t2, t3, t4;
2855 /* Give up for -1. */
2856 if (d == -1)
2857 return NULL;
2859 /* Since d might be INT_MIN, we have to cast to
2860 unsigned HOST_WIDE_INT before negating to avoid
2861 undefined signed overflow. */
2862 abs_d = (d >= 0
2863 ? (unsigned HOST_WIDE_INT) d
2864 : - (unsigned HOST_WIDE_INT) d);
2866 /* n rem d = n rem -d */
2867 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2869 d = abs_d;
2870 oprnd1 = build_int_cst (itype, abs_d);
2872 else if (HOST_BITS_PER_WIDE_INT >= prec
2873 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2874 /* This case is not handled correctly below. */
2875 return NULL;
2877 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2878 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2880 add = true;
2881 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2883 if (post_shift >= prec)
2884 return NULL;
2886 /* t1 = oprnd0 h* ml; */
2887 t1 = vect_recog_temp_ssa_var (itype, NULL);
2888 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2889 build_int_cst (itype, ml));
2891 if (add)
2893 /* t2 = t1 + oprnd0; */
2894 append_pattern_def_seq (stmt_vinfo, def_stmt);
2895 t2 = vect_recog_temp_ssa_var (itype, NULL);
2896 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2898 else
2899 t2 = t1;
2901 if (post_shift)
2903 /* t3 = t2 >> post_shift; */
2904 append_pattern_def_seq (stmt_vinfo, def_stmt);
2905 t3 = vect_recog_temp_ssa_var (itype, NULL);
2906 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2907 build_int_cst (itype, post_shift));
2909 else
2910 t3 = t2;
2912 wide_int oprnd0_min, oprnd0_max;
2913 int msb = 1;
2914 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2916 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2917 msb = 0;
2918 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2919 msb = -1;
2922 if (msb == 0 && d >= 0)
2924 /* q = t3; */
2925 q = t3;
2926 pattern_stmt = def_stmt;
2928 else
2930 /* t4 = oprnd0 >> (prec - 1);
2931 or if we know from VRP that oprnd0 >= 0
2932 t4 = 0;
2933 or if we know from VRP that oprnd0 < 0
2934 t4 = -1; */
2935 append_pattern_def_seq (stmt_vinfo, def_stmt);
2936 t4 = vect_recog_temp_ssa_var (itype, NULL);
2937 if (msb != 1)
2938 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2939 build_int_cst (itype, msb));
2940 else
2941 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2942 build_int_cst (itype, prec - 1));
2943 append_pattern_def_seq (stmt_vinfo, def_stmt);
2945 /* q = t3 - t4; or q = t4 - t3; */
2946 q = vect_recog_temp_ssa_var (itype, NULL);
2947 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2948 d < 0 ? t3 : t4);
2952 if (rhs_code == TRUNC_MOD_EXPR)
2954 tree r, t1;
2956 /* We divided. Now finish by:
2957 t1 = q * oprnd1;
2958 r = oprnd0 - t1; */
2959 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2961 t1 = vect_recog_temp_ssa_var (itype, NULL);
2962 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2963 append_pattern_def_seq (stmt_vinfo, def_stmt);
2965 r = vect_recog_temp_ssa_var (itype, NULL);
2966 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2969 /* Pattern detected. */
2970 if (dump_enabled_p ())
2972 dump_printf_loc (MSG_NOTE, vect_location,
2973 "vect_recog_divmod_pattern: detected: ");
2974 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2977 stmts->safe_push (last_stmt);
2979 *type_in = vectype;
2980 *type_out = vectype;
2981 return pattern_stmt;
2984 /* Function vect_recog_mixed_size_cond_pattern
2986 Try to find the following pattern:
2988 type x_t, y_t;
2989 TYPE a_T, b_T, c_T;
2990 loop:
2991 S1 a_T = x_t CMP y_t ? b_T : c_T;
2993 where type 'TYPE' is an integral type which has different size
2994 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2995 than 'type', the constants need to fit into an integer type
2996 with the same width as 'type') or results of conversion from 'type'.
2998 Input:
3000 * LAST_STMT: A stmt from which the pattern search begins.
3002 Output:
3004 * TYPE_IN: The type of the input arguments to the pattern.
3006 * TYPE_OUT: The type of the output of this pattern.
3008 * Return value: A new stmt that will be used to replace the pattern.
3009 Additionally a def_stmt is added.
3011 a_it = x_t CMP y_t ? b_it : c_it;
3012 a_T = (TYPE) a_it; */
3014 static gimple *
3015 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
3016 tree *type_out)
3018 gimple *last_stmt = (*stmts)[0];
3019 tree cond_expr, then_clause, else_clause;
3020 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3021 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3022 gimple *pattern_stmt, *def_stmt;
3023 vec_info *vinfo = stmt_vinfo->vinfo;
3024 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3025 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3026 bool promotion;
3027 tree comp_scalar_type;
3029 if (!is_gimple_assign (last_stmt)
3030 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3031 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3032 return NULL;
3034 cond_expr = gimple_assign_rhs1 (last_stmt);
3035 then_clause = gimple_assign_rhs2 (last_stmt);
3036 else_clause = gimple_assign_rhs3 (last_stmt);
3038 if (!COMPARISON_CLASS_P (cond_expr))
3039 return NULL;
3041 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3042 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3043 if (comp_vectype == NULL_TREE)
3044 return NULL;
3046 type = gimple_expr_type (last_stmt);
3047 if (types_compatible_p (type, comp_scalar_type)
3048 || ((TREE_CODE (then_clause) != INTEGER_CST
3049 || TREE_CODE (else_clause) != INTEGER_CST)
3050 && !INTEGRAL_TYPE_P (comp_scalar_type))
3051 || !INTEGRAL_TYPE_P (type))
3052 return NULL;
3054 if ((TREE_CODE (then_clause) != INTEGER_CST
3055 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3056 &def_stmt0, &promotion))
3057 || (TREE_CODE (else_clause) != INTEGER_CST
3058 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3059 &def_stmt1, &promotion)))
3060 return NULL;
3062 if (orig_type0 && orig_type1
3063 && !types_compatible_p (orig_type0, orig_type1))
3064 return NULL;
3066 if (orig_type0)
3068 if (!types_compatible_p (orig_type0, comp_scalar_type))
3069 return NULL;
3070 then_clause = gimple_assign_rhs1 (def_stmt0);
3071 itype = orig_type0;
3074 if (orig_type1)
3076 if (!types_compatible_p (orig_type1, comp_scalar_type))
3077 return NULL;
3078 else_clause = gimple_assign_rhs1 (def_stmt1);
3079 itype = orig_type1;
3083 HOST_WIDE_INT cmp_mode_size
3084 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3086 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3087 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3088 return NULL;
3090 vectype = get_vectype_for_scalar_type (type);
3091 if (vectype == NULL_TREE)
3092 return NULL;
3094 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3095 return NULL;
3097 if (itype == NULL_TREE)
3098 itype = build_nonstandard_integer_type (cmp_mode_size,
3099 TYPE_UNSIGNED (type));
3101 if (itype == NULL_TREE
3102 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3103 return NULL;
3105 vecitype = get_vectype_for_scalar_type (itype);
3106 if (vecitype == NULL_TREE)
3107 return NULL;
3109 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3110 return NULL;
3112 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3114 if ((TREE_CODE (then_clause) == INTEGER_CST
3115 && !int_fits_type_p (then_clause, itype))
3116 || (TREE_CODE (else_clause) == INTEGER_CST
3117 && !int_fits_type_p (else_clause, itype)))
3118 return NULL;
3121 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3122 COND_EXPR, unshare_expr (cond_expr),
3123 fold_convert (itype, then_clause),
3124 fold_convert (itype, else_clause));
3125 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3126 NOP_EXPR, gimple_assign_lhs (def_stmt));
3128 new_pattern_def_seq (stmt_vinfo, def_stmt);
3129 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3130 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3131 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3132 *type_in = vecitype;
3133 *type_out = vectype;
3135 if (dump_enabled_p ())
3136 dump_printf_loc (MSG_NOTE, vect_location,
3137 "vect_recog_mixed_size_cond_pattern: detected:\n");
3139 return pattern_stmt;
3143 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3144 true if bool VAR can and should be optimized that way. Assume it shouldn't
3145 in case it's a result of a comparison which can be directly vectorized into
3146 a vector comparison. Fills in STMTS with all stmts visited during the
3147 walk. */
3149 static bool
3150 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3152 gimple *def_stmt;
3153 enum vect_def_type dt;
3154 tree rhs1;
3155 enum tree_code rhs_code;
3157 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3158 return false;
3160 if (dt != vect_internal_def)
3161 return false;
3163 if (!is_gimple_assign (def_stmt))
3164 return false;
3166 if (stmts.contains (def_stmt))
3167 return true;
3169 rhs1 = gimple_assign_rhs1 (def_stmt);
3170 rhs_code = gimple_assign_rhs_code (def_stmt);
3171 switch (rhs_code)
3173 case SSA_NAME:
3174 if (! check_bool_pattern (rhs1, vinfo, stmts))
3175 return false;
3176 break;
3178 CASE_CONVERT:
3179 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3180 return false;
3181 if (! check_bool_pattern (rhs1, vinfo, stmts))
3182 return false;
3183 break;
3185 case BIT_NOT_EXPR:
3186 if (! check_bool_pattern (rhs1, vinfo, stmts))
3187 return false;
3188 break;
3190 case BIT_AND_EXPR:
3191 case BIT_IOR_EXPR:
3192 case BIT_XOR_EXPR:
3193 if (! check_bool_pattern (rhs1, vinfo, stmts)
3194 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3195 return false;
3196 break;
3198 default:
3199 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3201 tree vecitype, comp_vectype;
3203 /* If the comparison can throw, then is_gimple_condexpr will be
3204 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3205 if (stmt_could_throw_p (def_stmt))
3206 return false;
3208 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3209 if (comp_vectype == NULL_TREE)
3210 return false;
3212 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3213 if (mask_type
3214 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3215 return false;
3217 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3219 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3220 tree itype
3221 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3222 vecitype = get_vectype_for_scalar_type (itype);
3223 if (vecitype == NULL_TREE)
3224 return false;
3226 else
3227 vecitype = comp_vectype;
3228 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3229 return false;
3231 else
3232 return false;
3233 break;
3236 bool res = stmts.add (def_stmt);
3237 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3238 gcc_assert (!res);
3240 return true;
3244 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3245 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3246 pattern sequence. */
3248 static tree
3249 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3251 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3252 NOP_EXPR, var);
3253 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3254 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3255 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3256 append_pattern_def_seq (stmt_info, cast_stmt);
3257 return gimple_assign_lhs (cast_stmt);
3260 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3261 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3262 type, OUT_TYPE is the desired final integer type of the whole pattern.
3263 STMT_INFO is the info of the pattern root and is where pattern stmts should
3264 be associated with. DEFS is a map of pattern defs. */
3266 static void
3267 adjust_bool_pattern (tree var, tree out_type,
3268 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3270 gimple *stmt = SSA_NAME_DEF_STMT (var);
3271 enum tree_code rhs_code, def_rhs_code;
3272 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3273 location_t loc;
3274 gimple *pattern_stmt, *def_stmt;
3275 tree trueval = NULL_TREE;
3277 rhs1 = gimple_assign_rhs1 (stmt);
3278 rhs2 = gimple_assign_rhs2 (stmt);
3279 rhs_code = gimple_assign_rhs_code (stmt);
3280 loc = gimple_location (stmt);
3281 switch (rhs_code)
3283 case SSA_NAME:
3284 CASE_CONVERT:
3285 irhs1 = *defs.get (rhs1);
3286 itype = TREE_TYPE (irhs1);
3287 pattern_stmt
3288 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3289 SSA_NAME, irhs1);
3290 break;
3292 case BIT_NOT_EXPR:
3293 irhs1 = *defs.get (rhs1);
3294 itype = TREE_TYPE (irhs1);
3295 pattern_stmt
3296 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3297 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3298 break;
3300 case BIT_AND_EXPR:
3301 /* Try to optimize x = y & (a < b ? 1 : 0); into
3302 x = (a < b ? y : 0);
3304 E.g. for:
3305 bool a_b, b_b, c_b;
3306 TYPE d_T;
3308 S1 a_b = x1 CMP1 y1;
3309 S2 b_b = x2 CMP2 y2;
3310 S3 c_b = a_b & b_b;
3311 S4 d_T = (TYPE) c_b;
3313 we would normally emit:
3315 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3316 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3317 S3' c_T = a_T & b_T;
3318 S4' d_T = c_T;
3320 but we can save one stmt by using the
3321 result of one of the COND_EXPRs in the other COND_EXPR and leave
3322 BIT_AND_EXPR stmt out:
3324 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3325 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3326 S4' f_T = c_T;
3328 At least when VEC_COND_EXPR is implemented using masks
3329 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3330 computes the comparison masks and ands it, in one case with
3331 all ones vector, in the other case with a vector register.
3332 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3333 often more expensive. */
3334 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3335 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3336 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3338 irhs1 = *defs.get (rhs1);
3339 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3340 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3341 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3343 rhs_code = def_rhs_code;
3344 rhs1 = def_rhs1;
3345 rhs2 = gimple_assign_rhs2 (def_stmt);
3346 trueval = irhs1;
3347 goto do_compare;
3349 else
3350 irhs2 = *defs.get (rhs2);
3351 goto and_ior_xor;
3353 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3354 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3355 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3357 irhs2 = *defs.get (rhs2);
3358 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3359 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3360 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3362 rhs_code = def_rhs_code;
3363 rhs1 = def_rhs1;
3364 rhs2 = gimple_assign_rhs2 (def_stmt);
3365 trueval = irhs2;
3366 goto do_compare;
3368 else
3369 irhs1 = *defs.get (rhs1);
3370 goto and_ior_xor;
3372 /* FALLTHRU */
3373 case BIT_IOR_EXPR:
3374 case BIT_XOR_EXPR:
3375 irhs1 = *defs.get (rhs1);
3376 irhs2 = *defs.get (rhs2);
3377 and_ior_xor:
3378 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3379 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3381 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3382 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3383 int out_prec = TYPE_PRECISION (out_type);
3384 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3385 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3386 stmt_info);
3387 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3388 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3389 stmt_info);
3390 else
3392 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3393 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3396 itype = TREE_TYPE (irhs1);
3397 pattern_stmt
3398 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3399 rhs_code, irhs1, irhs2);
3400 break;
3402 default:
3403 do_compare:
3404 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3405 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3406 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3407 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3408 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3410 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3411 itype
3412 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3414 else
3415 itype = TREE_TYPE (rhs1);
3416 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3417 if (trueval == NULL_TREE)
3418 trueval = build_int_cst (itype, 1);
3419 else
3420 gcc_checking_assert (useless_type_conversion_p (itype,
3421 TREE_TYPE (trueval)));
3422 pattern_stmt
3423 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3424 COND_EXPR, cond_expr, trueval,
3425 build_int_cst (itype, 0));
3426 break;
3429 gimple_set_location (pattern_stmt, loc);
3430 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3431 pattern def seq stmts instead of just letting auto-detection do
3432 its work? */
3433 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3434 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3435 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3436 append_pattern_def_seq (stmt_info, pattern_stmt);
3437 defs.put (var, gimple_assign_lhs (pattern_stmt));
3440 /* Comparison function to qsort a vector of gimple stmts after UID. */
3442 static int
3443 sort_after_uid (const void *p1, const void *p2)
3445 const gimple *stmt1 = *(const gimple * const *)p1;
3446 const gimple *stmt2 = *(const gimple * const *)p2;
3447 return gimple_uid (stmt1) - gimple_uid (stmt2);
3450 /* Create pattern stmts for all stmts participating in the bool pattern
3451 specified by BOOL_STMT_SET and its root STMT with the desired type
3452 OUT_TYPE. Return the def of the pattern root. */
3454 static tree
3455 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3456 tree out_type, gimple *stmt)
3458 /* Gather original stmts in the bool pattern in their order of appearance
3459 in the IL. */
3460 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3461 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3462 i != bool_stmt_set.end (); ++i)
3463 bool_stmts.quick_push (*i);
3464 bool_stmts.qsort (sort_after_uid);
3466 /* Now process them in that order, producing pattern stmts. */
3467 hash_map <tree, tree> defs;
3468 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3469 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3470 out_type, vinfo_for_stmt (stmt), defs);
3472 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3473 gimple *pattern_stmt
3474 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3475 return gimple_assign_lhs (pattern_stmt);
3478 /* Helper for search_type_for_mask. */
3480 static tree
3481 search_type_for_mask_1 (tree var, vec_info *vinfo,
3482 hash_map<gimple *, tree> &cache)
3484 gimple *def_stmt;
3485 enum vect_def_type dt;
3486 tree rhs1;
3487 enum tree_code rhs_code;
3488 tree res = NULL_TREE, res2;
3490 if (TREE_CODE (var) != SSA_NAME)
3491 return NULL_TREE;
3493 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3494 return NULL_TREE;
3496 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3497 return NULL_TREE;
3499 if (dt != vect_internal_def)
3500 return NULL_TREE;
3502 if (!is_gimple_assign (def_stmt))
3503 return NULL_TREE;
3505 tree *c = cache.get (def_stmt);
3506 if (c)
3507 return *c;
3509 rhs_code = gimple_assign_rhs_code (def_stmt);
3510 rhs1 = gimple_assign_rhs1 (def_stmt);
3512 switch (rhs_code)
3514 case SSA_NAME:
3515 case BIT_NOT_EXPR:
3516 CASE_CONVERT:
3517 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3518 break;
3520 case BIT_AND_EXPR:
3521 case BIT_IOR_EXPR:
3522 case BIT_XOR_EXPR:
3523 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3524 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3525 cache);
3526 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3527 res = res2;
3528 break;
3530 default:
3531 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3533 tree comp_vectype, mask_type;
3535 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3537 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3538 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3539 vinfo, cache);
3540 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3541 res = res2;
3542 break;
3545 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3546 if (comp_vectype == NULL_TREE)
3548 res = NULL_TREE;
3549 break;
3552 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3553 if (!mask_type
3554 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3556 res = NULL_TREE;
3557 break;
3560 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3561 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3563 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3564 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3566 else
3567 res = TREE_TYPE (rhs1);
3571 cache.put (def_stmt, res);
3572 return res;
3575 /* Return the proper type for converting bool VAR into
3576 an integer value or NULL_TREE if no such type exists.
3577 The type is chosen so that converted value has the
3578 same number of elements as VAR's vector type. */
3580 static tree
3581 search_type_for_mask (tree var, vec_info *vinfo)
3583 hash_map<gimple *, tree> cache;
3584 return search_type_for_mask_1 (var, vinfo, cache);
3587 /* Function vect_recog_bool_pattern
3589 Try to find pattern like following:
3591 bool a_b, b_b, c_b, d_b, e_b;
3592 TYPE f_T;
3593 loop:
3594 S1 a_b = x1 CMP1 y1;
3595 S2 b_b = x2 CMP2 y2;
3596 S3 c_b = a_b & b_b;
3597 S4 d_b = x3 CMP3 y3;
3598 S5 e_b = c_b | d_b;
3599 S6 f_T = (TYPE) e_b;
3601 where type 'TYPE' is an integral type. Or a similar pattern
3602 ending in
3604 S6 f_Y = e_b ? r_Y : s_Y;
3606 as results from if-conversion of a complex condition.
3608 Input:
3610 * LAST_STMT: A stmt at the end from which the pattern
3611 search begins, i.e. cast of a bool to
3612 an integer type.
3614 Output:
3616 * TYPE_IN: The type of the input arguments to the pattern.
3618 * TYPE_OUT: The type of the output of this pattern.
3620 * Return value: A new stmt that will be used to replace the pattern.
3622 Assuming size of TYPE is the same as size of all comparisons
3623 (otherwise some casts would be added where needed), the above
3624 sequence we create related pattern stmts:
3625 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3626 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3627 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3628 S5' e_T = c_T | d_T;
3629 S6' f_T = e_T;
3631 Instead of the above S3' we could emit:
3632 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3633 S3' c_T = a_T | b_T;
3634 but the above is more efficient. */
3636 static gimple *
3637 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3638 tree *type_out)
3640 gimple *last_stmt = stmts->pop ();
3641 enum tree_code rhs_code;
3642 tree var, lhs, rhs, vectype;
3643 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3644 stmt_vec_info new_stmt_info;
3645 vec_info *vinfo = stmt_vinfo->vinfo;
3646 gimple *pattern_stmt;
3648 if (!is_gimple_assign (last_stmt))
3649 return NULL;
3651 var = gimple_assign_rhs1 (last_stmt);
3652 lhs = gimple_assign_lhs (last_stmt);
3654 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3655 return NULL;
3657 hash_set<gimple *> bool_stmts;
3659 rhs_code = gimple_assign_rhs_code (last_stmt);
3660 if (CONVERT_EXPR_CODE_P (rhs_code))
3662 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3663 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3664 return NULL;
3665 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3666 if (vectype == NULL_TREE)
3667 return NULL;
3669 if (check_bool_pattern (var, vinfo, bool_stmts))
3671 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3672 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3673 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3674 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3675 else
3676 pattern_stmt
3677 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3679 else
3681 tree type = search_type_for_mask (var, vinfo);
3682 tree cst0, cst1, tmp;
3684 if (!type)
3685 return NULL;
3687 /* We may directly use cond with narrowed type to avoid
3688 multiple cond exprs with following result packing and
3689 perform single cond with packed mask instead. In case
3690 of widening we better make cond first and then extract
3691 results. */
3692 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3693 type = TREE_TYPE (lhs);
3695 cst0 = build_int_cst (type, 0);
3696 cst1 = build_int_cst (type, 1);
3697 tmp = vect_recog_temp_ssa_var (type, NULL);
3698 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3700 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3702 tree new_vectype = get_vectype_for_scalar_type (type);
3703 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3704 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3705 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3706 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3708 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3709 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3713 *type_out = vectype;
3714 *type_in = vectype;
3715 stmts->safe_push (last_stmt);
3716 if (dump_enabled_p ())
3717 dump_printf_loc (MSG_NOTE, vect_location,
3718 "vect_recog_bool_pattern: detected:\n");
3720 return pattern_stmt;
3722 else if (rhs_code == COND_EXPR
3723 && TREE_CODE (var) == SSA_NAME)
3725 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3726 if (vectype == NULL_TREE)
3727 return NULL;
3729 /* Build a scalar type for the boolean result that when
3730 vectorized matches the vector type of the result in
3731 size and number of elements. */
3732 unsigned prec
3733 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3734 TYPE_VECTOR_SUBPARTS (vectype));
3736 tree type
3737 = build_nonstandard_integer_type (prec,
3738 TYPE_UNSIGNED (TREE_TYPE (var)));
3739 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3740 return NULL;
3742 if (!check_bool_pattern (var, vinfo, bool_stmts))
3743 return NULL;
3745 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3747 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3748 pattern_stmt
3749 = gimple_build_assign (lhs, COND_EXPR,
3750 build2 (NE_EXPR, boolean_type_node,
3751 rhs, build_int_cst (type, 0)),
3752 gimple_assign_rhs2 (last_stmt),
3753 gimple_assign_rhs3 (last_stmt));
3754 *type_out = vectype;
3755 *type_in = vectype;
3756 stmts->safe_push (last_stmt);
3757 if (dump_enabled_p ())
3758 dump_printf_loc (MSG_NOTE, vect_location,
3759 "vect_recog_bool_pattern: detected:\n");
3761 return pattern_stmt;
3763 else if (rhs_code == SSA_NAME
3764 && STMT_VINFO_DATA_REF (stmt_vinfo))
3766 stmt_vec_info pattern_stmt_info;
3767 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3768 gcc_assert (vectype != NULL_TREE);
3769 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3770 return NULL;
3772 if (check_bool_pattern (var, vinfo, bool_stmts))
3773 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3774 else
3776 tree type = search_type_for_mask (var, vinfo);
3777 tree cst0, cst1, new_vectype;
3779 if (!type)
3780 return NULL;
3782 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3783 type = TREE_TYPE (vectype);
3785 cst0 = build_int_cst (type, 0);
3786 cst1 = build_int_cst (type, 1);
3787 new_vectype = get_vectype_for_scalar_type (type);
3789 rhs = vect_recog_temp_ssa_var (type, NULL);
3790 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3792 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3793 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3794 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3795 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3798 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3799 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3801 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3802 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3803 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3804 rhs = rhs2;
3806 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3807 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3808 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3809 STMT_VINFO_DATA_REF (pattern_stmt_info)
3810 = STMT_VINFO_DATA_REF (stmt_vinfo);
3811 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3812 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3813 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3814 *type_out = vectype;
3815 *type_in = vectype;
3816 stmts->safe_push (last_stmt);
3817 if (dump_enabled_p ())
3818 dump_printf_loc (MSG_NOTE, vect_location,
3819 "vect_recog_bool_pattern: detected:\n");
3820 return pattern_stmt;
3822 else
3823 return NULL;
3827 /* A helper for vect_recog_mask_conversion_pattern. Build
3828 conversion of MASK to a type suitable for masking VECTYPE.
3829 Built statement gets required vectype and is appended to
3830 a pattern sequence of STMT_VINFO.
3832 Return converted mask. */
3834 static tree
3835 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3836 vec_info *vinfo)
3838 gimple *stmt;
3839 tree masktype, tmp;
3840 stmt_vec_info new_stmt_info;
3842 masktype = build_same_sized_truth_vector_type (vectype);
3843 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3844 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3845 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3846 set_vinfo_for_stmt (stmt, new_stmt_info);
3847 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3848 append_pattern_def_seq (stmt_vinfo, stmt);
3850 return tmp;
3854 /* Function vect_recog_mask_conversion_pattern
3856 Try to find statements which require boolean type
3857 converison. Additional conversion statements are
3858 added to handle such cases. For example:
3860 bool m_1, m_2, m_3;
3861 int i_4, i_5;
3862 double d_6, d_7;
3863 char c_1, c_2, c_3;
3865 S1 m_1 = i_4 > i_5;
3866 S2 m_2 = d_6 < d_7;
3867 S3 m_3 = m_1 & m_2;
3868 S4 c_1 = m_3 ? c_2 : c_3;
3870 Will be transformed into:
3872 S1 m_1 = i_4 > i_5;
3873 S2 m_2 = d_6 < d_7;
3874 S3'' m_2' = (_Bool[bitsize=32])m_2
3875 S3' m_3' = m_1 & m_2';
3876 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3877 S4' c_1' = m_3'' ? c_2 : c_3; */
3879 static gimple *
3880 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3881 tree *type_out)
3883 gimple *last_stmt = stmts->pop ();
3884 enum tree_code rhs_code;
3885 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3886 tree vectype1, vectype2;
3887 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3888 stmt_vec_info pattern_stmt_info;
3889 vec_info *vinfo = stmt_vinfo->vinfo;
3891 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3892 if (is_gimple_call (last_stmt)
3893 && gimple_call_internal_p (last_stmt)
3894 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3895 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3897 gcall *pattern_stmt;
3898 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3900 if (load)
3902 lhs = gimple_call_lhs (last_stmt);
3903 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3905 else
3907 rhs2 = gimple_call_arg (last_stmt, 3);
3908 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3911 rhs1 = gimple_call_arg (last_stmt, 2);
3912 rhs1_type = search_type_for_mask (rhs1, vinfo);
3913 if (!rhs1_type)
3914 return NULL;
3915 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3917 if (!vectype1 || !vectype2
3918 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3919 TYPE_VECTOR_SUBPARTS (vectype2)))
3920 return NULL;
3922 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3924 if (load)
3926 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3927 pattern_stmt
3928 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3929 gimple_call_arg (last_stmt, 0),
3930 gimple_call_arg (last_stmt, 1),
3931 tmp);
3932 gimple_call_set_lhs (pattern_stmt, lhs);
3934 else
3935 pattern_stmt
3936 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3937 gimple_call_arg (last_stmt, 0),
3938 gimple_call_arg (last_stmt, 1),
3939 tmp,
3940 gimple_call_arg (last_stmt, 3));
3942 gimple_call_set_nothrow (pattern_stmt, true);
3944 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3945 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3946 STMT_VINFO_DATA_REF (pattern_stmt_info)
3947 = STMT_VINFO_DATA_REF (stmt_vinfo);
3948 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3949 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3950 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3952 *type_out = vectype1;
3953 *type_in = vectype1;
3954 stmts->safe_push (last_stmt);
3955 if (dump_enabled_p ())
3956 dump_printf_loc (MSG_NOTE, vect_location,
3957 "vect_recog_mask_conversion_pattern: detected:\n");
3959 return pattern_stmt;
3962 if (!is_gimple_assign (last_stmt))
3963 return NULL;
3965 gimple *pattern_stmt;
3966 lhs = gimple_assign_lhs (last_stmt);
3967 rhs1 = gimple_assign_rhs1 (last_stmt);
3968 rhs_code = gimple_assign_rhs_code (last_stmt);
3970 /* Check for cond expression requiring mask conversion. */
3971 if (rhs_code == COND_EXPR)
3973 /* vect_recog_mixed_size_cond_pattern could apply.
3974 Do nothing then. */
3975 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3976 return NULL;
3978 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3980 if (TREE_CODE (rhs1) == SSA_NAME)
3982 rhs1_type = search_type_for_mask (rhs1, vinfo);
3983 if (!rhs1_type)
3984 return NULL;
3986 else if (COMPARISON_CLASS_P (rhs1))
3988 /* Check whether we're comparing scalar booleans and (if so)
3989 whether a better mask type exists than the mask associated
3990 with boolean-sized elements. This avoids unnecessary packs
3991 and unpacks if the booleans are set from comparisons of
3992 wider types. E.g. in:
3994 int x1, x2, x3, x4, y1, y1;
3996 bool b1 = (x1 == x2);
3997 bool b2 = (x3 == x4);
3998 ... = b1 == b2 ? y1 : y2;
4000 it is better for b1 and b2 to use the mask type associated
4001 with int elements rather bool (byte) elements. */
4002 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4003 if (!rhs1_type)
4004 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4006 else
4007 return NULL;
4009 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4011 if (!vectype1 || !vectype2)
4012 return NULL;
4014 /* Continue if a conversion is needed. Also continue if we have
4015 a comparison whose vector type would normally be different from
4016 VECTYPE2 when considered in isolation. In that case we'll
4017 replace the comparison with an SSA name (so that we can record
4018 its vector type) and behave as though the comparison was an SSA
4019 name from the outset. */
4020 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4021 TYPE_VECTOR_SUBPARTS (vectype2))
4022 && (TREE_CODE (rhs1) == SSA_NAME
4023 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4024 return NULL;
4026 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4027 in place, we can handle it in vectorizable_condition. This avoids
4028 unnecessary promotion stmts and increased vectorization factor. */
4029 if (COMPARISON_CLASS_P (rhs1)
4030 && INTEGRAL_TYPE_P (rhs1_type)
4031 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4032 TYPE_VECTOR_SUBPARTS (vectype2)))
4034 gimple *dummy;
4035 enum vect_def_type dt;
4036 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
4037 &dummy, &dt)
4038 && dt == vect_external_def
4039 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
4040 &dummy, &dt)
4041 && (dt == vect_external_def
4042 || dt == vect_constant_def))
4044 tree wide_scalar_type = build_nonstandard_integer_type
4045 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4046 TYPE_UNSIGNED (rhs1_type));
4047 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4048 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4049 return NULL;
4053 /* If rhs1 is a comparison we need to move it into a
4054 separate statement. */
4055 if (TREE_CODE (rhs1) != SSA_NAME)
4057 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4058 pattern_stmt = gimple_build_assign (tmp, rhs1);
4059 rhs1 = tmp;
4061 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4062 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4063 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4064 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4067 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4068 TYPE_VECTOR_SUBPARTS (vectype2)))
4069 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4070 else
4071 tmp = rhs1;
4073 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4074 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4075 gimple_assign_rhs2 (last_stmt),
4076 gimple_assign_rhs3 (last_stmt));
4078 *type_out = vectype1;
4079 *type_in = vectype1;
4080 stmts->safe_push (last_stmt);
4081 if (dump_enabled_p ())
4082 dump_printf_loc (MSG_NOTE, vect_location,
4083 "vect_recog_mask_conversion_pattern: detected:\n");
4085 return pattern_stmt;
4088 /* Now check for binary boolean operations requiring conversion for
4089 one of operands. */
4090 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4091 return NULL;
4093 if (rhs_code != BIT_IOR_EXPR
4094 && rhs_code != BIT_XOR_EXPR
4095 && rhs_code != BIT_AND_EXPR
4096 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4097 return NULL;
4099 rhs2 = gimple_assign_rhs2 (last_stmt);
4101 rhs1_type = search_type_for_mask (rhs1, vinfo);
4102 rhs2_type = search_type_for_mask (rhs2, vinfo);
4104 if (!rhs1_type || !rhs2_type
4105 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4106 return NULL;
4108 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4110 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4111 if (!vectype1)
4112 return NULL;
4113 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4115 else
4117 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4118 if (!vectype1)
4119 return NULL;
4120 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4123 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4124 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4126 *type_out = vectype1;
4127 *type_in = vectype1;
4128 stmts->safe_push (last_stmt);
4129 if (dump_enabled_p ())
4130 dump_printf_loc (MSG_NOTE, vect_location,
4131 "vect_recog_mask_conversion_pattern: detected:\n");
4133 return pattern_stmt;
4136 /* STMT is a load or store. If the load or store is conditional, return
4137 the boolean condition under which it occurs, otherwise return null. */
4139 static tree
4140 vect_get_load_store_mask (gimple *stmt)
4142 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4144 gcc_assert (gimple_assign_single_p (def_assign));
4145 return NULL_TREE;
4148 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4150 internal_fn ifn = gimple_call_internal_fn (def_call);
4151 int mask_index = internal_fn_mask_index (ifn);
4152 return gimple_call_arg (def_call, mask_index);
4155 gcc_unreachable ();
4158 /* Return the scalar offset type that an internal gather/scatter function
4159 should use. GS_INFO describes the gather/scatter operation. */
4161 static tree
4162 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4164 tree offset_type = TREE_TYPE (gs_info->offset);
4165 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4167 /* Enforced by vect_check_gather_scatter. */
4168 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4169 gcc_assert (element_bits >= offset_bits);
4171 /* If the offset is narrower than the elements, extend it according
4172 to its sign. */
4173 if (element_bits > offset_bits)
4174 return build_nonstandard_integer_type (element_bits,
4175 TYPE_UNSIGNED (offset_type));
4177 return offset_type;
4180 /* Return MASK if MASK is suitable for masking an operation on vectors
4181 of type VECTYPE, otherwise convert it into such a form and return
4182 the result. Associate any conversion statements with STMT_INFO's
4183 pattern. */
4185 static tree
4186 vect_convert_mask_for_vectype (tree mask, tree vectype,
4187 stmt_vec_info stmt_info, vec_info *vinfo)
4189 tree mask_type = search_type_for_mask (mask, vinfo);
4190 if (mask_type)
4192 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4193 if (mask_vectype
4194 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4195 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4196 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4198 return mask;
4201 /* Return the equivalent of:
4203 fold_convert (TYPE, VALUE)
4205 with the expectation that the operation will be vectorized.
4206 If new statements are needed, add them as pattern statements
4207 to STMT_INFO. */
4209 static tree
4210 vect_add_conversion_to_patterm (tree type, tree value,
4211 stmt_vec_info stmt_info,
4212 vec_info *vinfo)
4214 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4215 return value;
4217 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4218 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4219 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4220 set_vinfo_for_stmt (conversion, new_stmt_info);
4221 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4222 append_pattern_def_seq (stmt_info, conversion);
4223 return new_value;
4226 /* Try to convert STMT into a call to a gather load or scatter store
4227 internal function. Return the final statement on success and set
4228 *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4230 This function only handles gathers and scatters that were recognized
4231 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4233 static gimple *
4234 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4235 tree *type_in, tree *type_out)
4237 /* Currently we only support this for loop vectorization. */
4238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4239 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4240 if (!loop_vinfo)
4241 return NULL;
4243 /* Make sure that we're looking at a gather load or scatter store. */
4244 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4245 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4246 return NULL;
4248 /* Get the boolean that controls whether the load or store happens.
4249 This is null if the operation is unconditional. */
4250 tree mask = vect_get_load_store_mask (stmt);
4252 /* Make sure that the target supports an appropriate internal
4253 function for the gather/scatter operation. */
4254 gather_scatter_info gs_info;
4255 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4256 || gs_info.decl)
4257 return NULL;
4259 /* Convert the mask to the right form. */
4260 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4261 if (mask)
4262 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4263 loop_vinfo);
4265 /* Get the invariant base and non-invariant offset, converting the
4266 latter to the same width as the vector elements. */
4267 tree base = gs_info.base;
4268 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4269 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4270 last_stmt_info, loop_vinfo);
4272 /* Build the new pattern statement. */
4273 tree scale = size_int (gs_info.scale);
4274 gcall *pattern_stmt;
4275 if (DR_IS_READ (dr))
4277 if (mask != NULL)
4278 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4279 offset, scale, mask);
4280 else
4281 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4282 offset, scale);
4283 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4284 gimple_call_set_lhs (pattern_stmt, load_lhs);
4286 else
4288 tree rhs = vect_get_store_rhs (stmt);
4289 if (mask != NULL)
4290 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4291 base, offset, scale, rhs,
4292 mask);
4293 else
4294 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4295 base, offset, scale, rhs);
4297 gimple_call_set_nothrow (pattern_stmt, true);
4299 /* Copy across relevant vectorization info and associate DR with the
4300 new pattern statement instead of the original statement. */
4301 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4302 loop_vinfo);
4303 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4304 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4305 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4306 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4307 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4308 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4309 DR_STMT (dr) = pattern_stmt;
4311 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4312 *type_out = vectype;
4313 *type_in = vectype;
4315 if (dump_enabled_p ())
4316 dump_printf_loc (MSG_NOTE, vect_location,
4317 "gather/scatter pattern detected:\n");
4319 return pattern_stmt;
4322 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4324 static gimple *
4325 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
4326 tree *type_out)
4328 gimple *last_stmt = stmts->pop ();
4329 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4330 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4331 last_stmt_info,
4332 type_in, type_out);
4333 if (pattern_stmt)
4334 stmts->safe_push (last_stmt);
4335 return pattern_stmt;
4338 /* Mark statements that are involved in a pattern. */
4340 static inline void
4341 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4342 tree pattern_vectype)
4344 stmt_vec_info pattern_stmt_info, def_stmt_info;
4345 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4346 vec_info *vinfo = orig_stmt_info->vinfo;
4347 gimple *def_stmt;
4349 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4350 if (pattern_stmt_info == NULL)
4352 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4353 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4355 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4357 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4358 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4359 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4360 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4361 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4362 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4363 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
4364 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4365 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
4367 gimple_stmt_iterator si;
4368 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
4369 !gsi_end_p (si); gsi_next (&si))
4371 def_stmt = gsi_stmt (si);
4372 def_stmt_info = vinfo_for_stmt (def_stmt);
4373 if (def_stmt_info == NULL)
4375 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4376 set_vinfo_for_stmt (def_stmt, def_stmt_info);
4378 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4379 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4380 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4381 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4382 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4387 /* Function vect_pattern_recog_1
4389 Input:
4390 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4391 computation pattern.
4392 STMT: A stmt from which the pattern search should start.
4394 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4395 expression that computes the same functionality and can be used to
4396 replace the sequence of stmts that are involved in the pattern.
4398 Output:
4399 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4400 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4401 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4402 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4403 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4404 to the available target pattern.
4406 This function also does some bookkeeping, as explained in the documentation
4407 for vect_recog_pattern. */
4409 static bool
4410 vect_pattern_recog_1 (vect_recog_func *recog_func,
4411 gimple_stmt_iterator si,
4412 vec<gimple *> *stmts_to_replace)
4414 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4415 stmt_vec_info stmt_info;
4416 loop_vec_info loop_vinfo;
4417 tree pattern_vectype;
4418 tree type_in, type_out;
4419 enum tree_code code;
4420 int i;
4421 gimple *next;
4423 stmts_to_replace->truncate (0);
4424 stmts_to_replace->quick_push (stmt);
4425 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4426 if (!pattern_stmt)
4427 return false;
4429 stmt = stmts_to_replace->last ();
4430 stmt_info = vinfo_for_stmt (stmt);
4431 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4433 if (VECTOR_BOOLEAN_TYPE_P (type_in)
4434 || VECTOR_TYPE_P (type_in))
4436 /* No need to check target support (already checked by the pattern
4437 recognition function). */
4438 pattern_vectype = type_out ? type_out : type_in;
4440 else
4442 machine_mode vec_mode;
4443 enum insn_code icode;
4444 optab optab;
4446 /* Check target support */
4447 type_in = get_vectype_for_scalar_type (type_in);
4448 if (!type_in)
4449 return false;
4450 if (type_out)
4451 type_out = get_vectype_for_scalar_type (type_out);
4452 else
4453 type_out = type_in;
4454 if (!type_out)
4455 return false;
4456 pattern_vectype = type_out;
4458 if (is_gimple_assign (pattern_stmt))
4459 code = gimple_assign_rhs_code (pattern_stmt);
4460 else
4462 gcc_assert (is_gimple_call (pattern_stmt));
4463 code = CALL_EXPR;
4466 optab = optab_for_tree_code (code, type_in, optab_default);
4467 vec_mode = TYPE_MODE (type_in);
4468 if (!optab
4469 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4470 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4471 return false;
4474 /* Found a vectorizable pattern. */
4475 if (dump_enabled_p ())
4477 dump_printf_loc (MSG_NOTE, vect_location,
4478 "%s pattern recognized: ", recog_func->name);
4479 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4482 /* Mark the stmts that are involved in the pattern. */
4483 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4485 /* Patterns cannot be vectorized using SLP, because they change the order of
4486 computation. */
4487 if (loop_vinfo)
4488 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
4489 if (next == stmt)
4490 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
4492 /* It is possible that additional pattern stmts are created and inserted in
4493 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4494 relevant statements. */
4495 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4496 && (unsigned) i < (stmts_to_replace->length () - 1);
4497 i++)
4499 stmt_info = vinfo_for_stmt (stmt);
4500 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4501 if (dump_enabled_p ())
4503 dump_printf_loc (MSG_NOTE, vect_location,
4504 "additional pattern stmt: ");
4505 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4508 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4511 return true;
4515 /* Function vect_pattern_recog
4517 Input:
4518 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4519 computation idioms.
4521 Output - for each computation idiom that is detected we create a new stmt
4522 that provides the same functionality and that can be vectorized. We
4523 also record some information in the struct_stmt_info of the relevant
4524 stmts, as explained below:
4526 At the entry to this function we have the following stmts, with the
4527 following initial value in the STMT_VINFO fields:
4529 stmt in_pattern_p related_stmt vec_stmt
4530 S1: a_i = .... - - -
4531 S2: a_2 = ..use(a_i).. - - -
4532 S3: a_1 = ..use(a_2).. - - -
4533 S4: a_0 = ..use(a_1).. - - -
4534 S5: ... = ..use(a_0).. - - -
4536 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4537 represented by a single stmt. We then:
4538 - create a new stmt S6 equivalent to the pattern (the stmt is not
4539 inserted into the code)
4540 - fill in the STMT_VINFO fields as follows:
4542 in_pattern_p related_stmt vec_stmt
4543 S1: a_i = .... - - -
4544 S2: a_2 = ..use(a_i).. - - -
4545 S3: a_1 = ..use(a_2).. - - -
4546 S4: a_0 = ..use(a_1).. true S6 -
4547 '---> S6: a_new = .... - S4 -
4548 S5: ... = ..use(a_0).. - - -
4550 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4551 to each other through the RELATED_STMT field).
4553 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4554 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4555 remain irrelevant unless used by stmts other than S4.
4557 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4558 (because they are marked as irrelevant). It will vectorize S6, and record
4559 a pointer to the new vector stmt VS6 from S6 (as usual).
4560 S4 will be skipped, and S5 will be vectorized as usual:
4562 in_pattern_p related_stmt vec_stmt
4563 S1: a_i = .... - - -
4564 S2: a_2 = ..use(a_i).. - - -
4565 S3: a_1 = ..use(a_2).. - - -
4566 > VS6: va_new = .... - - -
4567 S4: a_0 = ..use(a_1).. true S6 VS6
4568 '---> S6: a_new = .... - S4 VS6
4569 > VS5: ... = ..vuse(va_new).. - - -
4570 S5: ... = ..use(a_0).. - - -
4572 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4573 elsewhere), and we'll end up with:
4575 VS6: va_new = ....
4576 VS5: ... = ..vuse(va_new)..
4578 In case of more than one pattern statements, e.g., widen-mult with
4579 intermediate type:
4581 S1 a_t = ;
4582 S2 a_T = (TYPE) a_t;
4583 '--> S3: a_it = (interm_type) a_t;
4584 S4 prod_T = a_T * CONST;
4585 '--> S5: prod_T' = a_it w* CONST;
4587 there may be other users of a_T outside the pattern. In that case S2 will
4588 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4589 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4590 be recorded in S3. */
4592 void
4593 vect_pattern_recog (vec_info *vinfo)
4595 struct loop *loop;
4596 basic_block *bbs;
4597 unsigned int nbbs;
4598 gimple_stmt_iterator si;
4599 unsigned int i, j;
4600 auto_vec<gimple *, 1> stmts_to_replace;
4601 gimple *stmt;
4603 if (dump_enabled_p ())
4604 dump_printf_loc (MSG_NOTE, vect_location,
4605 "=== vect_pattern_recog ===\n");
4607 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4609 loop = LOOP_VINFO_LOOP (loop_vinfo);
4610 bbs = LOOP_VINFO_BBS (loop_vinfo);
4611 nbbs = loop->num_nodes;
4613 /* Scan through the loop stmts, applying the pattern recognition
4614 functions starting at each stmt visited: */
4615 for (i = 0; i < nbbs; i++)
4617 basic_block bb = bbs[i];
4618 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4620 /* Scan over all generic vect_recog_xxx_pattern functions. */
4621 for (j = 0; j < NUM_PATTERNS; j++)
4622 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4623 &stmts_to_replace))
4624 break;
4628 else
4630 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4631 for (si = bb_vinfo->region_begin;
4632 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4634 if ((stmt = gsi_stmt (si))
4635 && vinfo_for_stmt (stmt)
4636 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4637 continue;
4639 /* Scan over all generic vect_recog_xxx_pattern functions. */
4640 for (j = 0; j < NUM_PATTERNS; j++)
4641 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4642 &stmts_to_replace))
4643 break;