Add support for SVE gather loads
[official-gcc.git] / gcc / tree-vect-patterns.c
blobf4b1b3e1ce92f82f0cbc4222357821cbd54d5dc9
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
45 /* Pattern recognition functions */
46 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
47 tree *);
48 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
49 tree *);
50 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
53 tree *);
54 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
55 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
56 tree *);
57 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
58 tree *, tree *);
59 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
60 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
61 tree *, tree *);
62 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
63 tree *, tree *);
65 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
66 tree *, tree *);
68 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69 tree *, tree *);
70 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
71 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
72 static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
73 tree *);
75 struct vect_recog_func
77 vect_recog_func_ptr fn;
78 const char *name;
81 /* Note that ordering matters - the first pattern matching on a stmt
82 is taken which means usually the more complex one needs to preceed
83 the less comples onex (widen_sum only after dot_prod or sad for example). */
84 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
85 { vect_recog_widen_mult_pattern, "widen_mult" },
86 { vect_recog_dot_prod_pattern, "dot_prod" },
87 { vect_recog_sad_pattern, "sad" },
88 { vect_recog_widen_sum_pattern, "widen_sum" },
89 { vect_recog_pow_pattern, "pow" },
90 { vect_recog_widen_shift_pattern, "widen_shift" },
91 { vect_recog_over_widening_pattern, "over_widening" },
92 { vect_recog_rotate_pattern, "rotate" },
93 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
94 { vect_recog_divmod_pattern, "divmod" },
95 { vect_recog_mult_pattern, "mult" },
96 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
97 { vect_recog_bool_pattern, "bool" },
98 /* This must come before mask conversion, and includes the parts
99 of mask conversion that are needed for gather and scatter
100 internal functions. */
101 { vect_recog_gather_scatter_pattern, "gather_scatter" },
102 { vect_recog_mask_conversion_pattern, "mask_conversion" }
105 static inline void
106 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
108 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
109 stmt);
112 static inline void
113 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
115 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
116 append_pattern_def_seq (stmt_info, stmt);
119 /* Check whether STMT2 is in the same loop or basic block as STMT1.
120 Which of the two applies depends on whether we're currently doing
121 loop-based or basic-block-based vectorization, as determined by
122 the vinfo_for_stmt for STMT1 (which must be defined).
124 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
125 to be defined as well. */
127 static bool
128 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
130 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
131 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
134 /* If the LHS of DEF_STMT has a single use, and that statement is
135 in the same loop or basic block, return it. */
137 static gimple *
138 vect_single_imm_use (gimple *def_stmt)
140 tree lhs = gimple_assign_lhs (def_stmt);
141 use_operand_p use_p;
142 gimple *use_stmt;
144 if (!single_imm_use (lhs, &use_p, &use_stmt))
145 return NULL;
147 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
148 return NULL;
150 return use_stmt;
153 /* Check whether NAME, an ssa-name used in USE_STMT,
154 is a result of a type promotion, such that:
155 DEF_STMT: NAME = NOP (name0)
156 If CHECK_SIGN is TRUE, check that either both types are signed or both are
157 unsigned. */
159 static bool
160 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
161 tree *orig_type, gimple **def_stmt, bool *promotion)
163 gimple *dummy_gimple;
164 stmt_vec_info stmt_vinfo;
165 tree type = TREE_TYPE (name);
166 tree oprnd0;
167 enum vect_def_type dt;
169 stmt_vinfo = vinfo_for_stmt (use_stmt);
170 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
171 return false;
173 if (dt != vect_internal_def
174 && dt != vect_external_def && dt != vect_constant_def)
175 return false;
177 if (!*def_stmt)
178 return false;
180 if (dt == vect_internal_def)
182 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
183 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
184 return false;
187 if (!is_gimple_assign (*def_stmt))
188 return false;
190 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
191 return false;
193 oprnd0 = gimple_assign_rhs1 (*def_stmt);
195 *orig_type = TREE_TYPE (oprnd0);
196 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
197 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
198 return false;
200 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
201 *promotion = true;
202 else
203 *promotion = false;
205 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
206 return false;
208 return true;
211 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
212 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
214 static tree
215 vect_recog_temp_ssa_var (tree type, gimple *stmt)
217 return make_temp_ssa_name (type, stmt, "patt");
220 /* Function vect_recog_dot_prod_pattern
222 Try to find the following pattern:
224 type x_t, y_t;
225 TYPE1 prod;
226 TYPE2 sum = init;
227 loop:
228 sum_0 = phi <init, sum_1>
229 S1 x_t = ...
230 S2 y_t = ...
231 S3 x_T = (TYPE1) x_t;
232 S4 y_T = (TYPE1) y_t;
233 S5 prod = x_T * y_T;
234 [S6 prod = (TYPE2) prod; #optional]
235 S7 sum_1 = prod + sum_0;
237 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
238 same size of 'TYPE1' or bigger. This is a special case of a reduction
239 computation.
241 Input:
243 * STMTS: Contains a stmt from which the pattern search begins. In the
244 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
245 will be detected.
247 Output:
249 * TYPE_IN: The type of the input arguments to the pattern.
251 * TYPE_OUT: The type of the output of this pattern.
253 * Return value: A new stmt that will be used to replace the sequence of
254 stmts that constitute the pattern. In this case it will be:
255 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
257 Note: The dot-prod idiom is a widening reduction pattern that is
258 vectorized without preserving all the intermediate results. It
259 produces only N/2 (widened) results (by summing up pairs of
260 intermediate results) rather than all N results. Therefore, we
261 cannot allow this pattern when we want to get all the results and in
262 the correct order (as is the case when this computation is in an
263 inner-loop nested in an outer-loop that us being vectorized). */
265 static gimple *
266 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
267 tree *type_out)
269 gimple *stmt, *last_stmt = (*stmts)[0];
270 tree oprnd0, oprnd1;
271 tree oprnd00, oprnd01;
272 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
273 tree type, half_type;
274 gimple *pattern_stmt;
275 tree prod_type;
276 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
277 struct loop *loop;
278 tree var;
279 bool promotion;
281 if (!loop_info)
282 return NULL;
284 loop = LOOP_VINFO_LOOP (loop_info);
286 /* We don't allow changing the order of the computation in the inner-loop
287 when doing outer-loop vectorization. */
288 if (loop && nested_in_vect_loop_p (loop, last_stmt))
289 return NULL;
291 if (!is_gimple_assign (last_stmt))
292 return NULL;
294 type = gimple_expr_type (last_stmt);
296 /* Look for the following pattern
297 DX = (TYPE1) X;
298 DY = (TYPE1) Y;
299 DPROD = DX * DY;
300 DDPROD = (TYPE2) DPROD;
301 sum_1 = DDPROD + sum_0;
302 In which
303 - DX is double the size of X
304 - DY is double the size of Y
305 - DX, DY, DPROD all have the same type
306 - sum is the same size of DPROD or bigger
307 - sum has been recognized as a reduction variable.
309 This is equivalent to:
310 DPROD = X w* Y; #widen mult
311 sum_1 = DPROD w+ sum_0; #widen summation
313 DPROD = X w* Y; #widen mult
314 sum_1 = DPROD + sum_0; #summation
317 /* Starting from LAST_STMT, follow the defs of its uses in search
318 of the above pattern. */
320 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
321 return NULL;
323 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
325 /* Has been detected as widening-summation? */
327 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
328 type = gimple_expr_type (stmt);
329 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
330 return NULL;
331 oprnd0 = gimple_assign_rhs1 (stmt);
332 oprnd1 = gimple_assign_rhs2 (stmt);
333 half_type = TREE_TYPE (oprnd0);
335 else
337 gimple *def_stmt;
339 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
340 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
341 return NULL;
342 oprnd0 = gimple_assign_rhs1 (last_stmt);
343 oprnd1 = gimple_assign_rhs2 (last_stmt);
344 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
345 || !types_compatible_p (TREE_TYPE (oprnd1), type))
346 return NULL;
347 stmt = last_stmt;
349 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
350 &promotion)
351 && promotion)
353 stmt = def_stmt;
354 oprnd0 = gimple_assign_rhs1 (stmt);
356 else
357 half_type = type;
360 /* So far so good. Since last_stmt was detected as a (summation) reduction,
361 we know that oprnd1 is the reduction variable (defined by a loop-header
362 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
363 Left to check that oprnd0 is defined by a (widen_)mult_expr */
364 if (TREE_CODE (oprnd0) != SSA_NAME)
365 return NULL;
367 prod_type = half_type;
368 stmt = SSA_NAME_DEF_STMT (oprnd0);
370 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
371 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
372 return NULL;
374 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
375 inside the loop (in case we are analyzing an outer-loop). */
376 if (!is_gimple_assign (stmt))
377 return NULL;
378 stmt_vinfo = vinfo_for_stmt (stmt);
379 gcc_assert (stmt_vinfo);
380 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
381 return NULL;
382 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
383 return NULL;
384 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
386 /* Has been detected as a widening multiplication? */
388 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
389 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
390 return NULL;
391 stmt_vinfo = vinfo_for_stmt (stmt);
392 gcc_assert (stmt_vinfo);
393 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
394 oprnd00 = gimple_assign_rhs1 (stmt);
395 oprnd01 = gimple_assign_rhs2 (stmt);
396 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
397 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
399 else
401 tree half_type0, half_type1;
402 gimple *def_stmt;
403 tree oprnd0, oprnd1;
405 oprnd0 = gimple_assign_rhs1 (stmt);
406 oprnd1 = gimple_assign_rhs2 (stmt);
407 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
408 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
409 return NULL;
410 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
411 &promotion)
412 || !promotion)
413 return NULL;
414 oprnd00 = gimple_assign_rhs1 (def_stmt);
415 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
416 &promotion)
417 || !promotion)
418 return NULL;
419 oprnd01 = gimple_assign_rhs1 (def_stmt);
420 if (!types_compatible_p (half_type0, half_type1))
421 return NULL;
422 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
423 return NULL;
426 half_type = TREE_TYPE (oprnd00);
427 *type_in = half_type;
428 *type_out = type;
430 /* Pattern detected. Create a stmt to be used to replace the pattern: */
431 var = vect_recog_temp_ssa_var (type, NULL);
432 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
433 oprnd00, oprnd01, oprnd1);
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_NOTE, vect_location,
438 "vect_recog_dot_prod_pattern: detected: ");
439 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
442 return pattern_stmt;
446 /* Function vect_recog_sad_pattern
448 Try to find the following Sum of Absolute Difference (SAD) pattern:
450 type x_t, y_t;
451 signed TYPE1 diff, abs_diff;
452 TYPE2 sum = init;
453 loop:
454 sum_0 = phi <init, sum_1>
455 S1 x_t = ...
456 S2 y_t = ...
457 S3 x_T = (TYPE1) x_t;
458 S4 y_T = (TYPE1) y_t;
459 S5 diff = x_T - y_T;
460 S6 abs_diff = ABS_EXPR <diff>;
461 [S7 abs_diff = (TYPE2) abs_diff; #optional]
462 S8 sum_1 = abs_diff + sum_0;
464 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
465 same size of 'TYPE1' or bigger. This is a special case of a reduction
466 computation.
468 Input:
470 * STMTS: Contains a stmt from which the pattern search begins. In the
471 example, when this function is called with S8, the pattern
472 {S3,S4,S5,S6,S7,S8} will be detected.
474 Output:
476 * TYPE_IN: The type of the input arguments to the pattern.
478 * TYPE_OUT: The type of the output of this pattern.
480 * Return value: A new stmt that will be used to replace the sequence of
481 stmts that constitute the pattern. In this case it will be:
482 SAD_EXPR <x_t, y_t, sum_0>
485 static gimple *
486 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
487 tree *type_out)
489 gimple *last_stmt = (*stmts)[0];
490 tree sad_oprnd0, sad_oprnd1;
491 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
492 tree half_type;
493 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
494 struct loop *loop;
495 bool promotion;
497 if (!loop_info)
498 return NULL;
500 loop = LOOP_VINFO_LOOP (loop_info);
502 /* We don't allow changing the order of the computation in the inner-loop
503 when doing outer-loop vectorization. */
504 if (loop && nested_in_vect_loop_p (loop, last_stmt))
505 return NULL;
507 if (!is_gimple_assign (last_stmt))
508 return NULL;
510 tree sum_type = gimple_expr_type (last_stmt);
512 /* Look for the following pattern
513 DX = (TYPE1) X;
514 DY = (TYPE1) Y;
515 DDIFF = DX - DY;
516 DAD = ABS_EXPR <DDIFF>;
517 DDPROD = (TYPE2) DPROD;
518 sum_1 = DAD + sum_0;
519 In which
520 - DX is at least double the size of X
521 - DY is at least double the size of Y
522 - DX, DY, DDIFF, DAD all have the same type
523 - sum is the same size of DAD or bigger
524 - sum has been recognized as a reduction variable.
526 This is equivalent to:
527 DDIFF = X w- Y; #widen sub
528 DAD = ABS_EXPR <DDIFF>;
529 sum_1 = DAD w+ sum_0; #widen summation
531 DDIFF = X w- Y; #widen sub
532 DAD = ABS_EXPR <DDIFF>;
533 sum_1 = DAD + sum_0; #summation
536 /* Starting from LAST_STMT, follow the defs of its uses in search
537 of the above pattern. */
539 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
540 return NULL;
542 tree plus_oprnd0, plus_oprnd1;
544 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
546 /* Has been detected as widening-summation? */
548 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
549 sum_type = gimple_expr_type (stmt);
550 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
551 return NULL;
552 plus_oprnd0 = gimple_assign_rhs1 (stmt);
553 plus_oprnd1 = gimple_assign_rhs2 (stmt);
554 half_type = TREE_TYPE (plus_oprnd0);
556 else
558 gimple *def_stmt;
560 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
561 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
562 return NULL;
563 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
564 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
565 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
566 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
567 return NULL;
569 /* The type conversion could be promotion, demotion,
570 or just signed -> unsigned. */
571 if (type_conversion_p (plus_oprnd0, last_stmt, false,
572 &half_type, &def_stmt, &promotion))
573 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
574 else
575 half_type = sum_type;
578 /* So far so good. Since last_stmt was detected as a (summation) reduction,
579 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
580 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
581 Then check that plus_oprnd0 is defined by an abs_expr. */
583 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
584 return NULL;
586 tree abs_type = half_type;
587 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
589 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
590 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
591 return NULL;
593 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
594 inside the loop (in case we are analyzing an outer-loop). */
595 if (!is_gimple_assign (abs_stmt))
596 return NULL;
598 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
599 gcc_assert (abs_stmt_vinfo);
600 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
601 return NULL;
602 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
603 return NULL;
605 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
606 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
607 return NULL;
608 if (TYPE_UNSIGNED (abs_type))
609 return NULL;
611 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
613 if (TREE_CODE (abs_oprnd) != SSA_NAME)
614 return NULL;
616 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
618 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
619 if (!gimple_bb (diff_stmt)
620 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
621 return NULL;
623 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
624 inside the loop (in case we are analyzing an outer-loop). */
625 if (!is_gimple_assign (diff_stmt))
626 return NULL;
628 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
629 gcc_assert (diff_stmt_vinfo);
630 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
631 return NULL;
632 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
633 return NULL;
635 tree half_type0, half_type1;
636 gimple *def_stmt;
638 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
639 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
641 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
642 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
643 return NULL;
644 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
645 &half_type0, &def_stmt, &promotion)
646 || !promotion)
647 return NULL;
648 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
650 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
651 &half_type1, &def_stmt, &promotion)
652 || !promotion)
653 return NULL;
654 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
656 if (!types_compatible_p (half_type0, half_type1))
657 return NULL;
658 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
659 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
660 return NULL;
662 *type_in = TREE_TYPE (sad_oprnd0);
663 *type_out = sum_type;
665 /* Pattern detected. Create a stmt to be used to replace the pattern: */
666 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
667 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
668 sad_oprnd1, plus_oprnd1);
670 if (dump_enabled_p ())
672 dump_printf_loc (MSG_NOTE, vect_location,
673 "vect_recog_sad_pattern: detected: ");
674 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
677 return pattern_stmt;
681 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
682 and LSHIFT_EXPR.
684 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
685 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
687 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
688 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
689 that satisfies the above restrictions, we can perform a widening opeartion
690 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
691 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
693 static bool
694 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
695 tree const_oprnd, tree *oprnd,
696 gimple **wstmt, tree type,
697 tree *half_type, gimple *def_stmt)
699 tree new_type, new_oprnd;
701 if (code != MULT_EXPR && code != LSHIFT_EXPR)
702 return false;
704 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
705 || (code == LSHIFT_EXPR
706 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
707 != 1))
708 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
710 /* CONST_OPRND is a constant of HALF_TYPE. */
711 *oprnd = gimple_assign_rhs1 (def_stmt);
712 return true;
715 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
716 return false;
718 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
719 return false;
721 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
722 a type 2 times bigger than HALF_TYPE. */
723 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
724 TYPE_UNSIGNED (type));
725 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
726 || (code == LSHIFT_EXPR
727 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
728 return false;
730 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
731 *oprnd = gimple_assign_rhs1 (def_stmt);
732 new_oprnd = make_ssa_name (new_type);
733 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
734 *oprnd = new_oprnd;
736 *half_type = new_type;
737 return true;
741 /* Function vect_recog_widen_mult_pattern
743 Try to find the following pattern:
745 type1 a_t;
746 type2 b_t;
747 TYPE a_T, b_T, prod_T;
749 S1 a_t = ;
750 S2 b_t = ;
751 S3 a_T = (TYPE) a_t;
752 S4 b_T = (TYPE) b_t;
753 S5 prod_T = a_T * b_T;
755 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
757 Also detect unsigned cases:
759 unsigned type1 a_t;
760 unsigned type2 b_t;
761 unsigned TYPE u_prod_T;
762 TYPE a_T, b_T, prod_T;
764 S1 a_t = ;
765 S2 b_t = ;
766 S3 a_T = (TYPE) a_t;
767 S4 b_T = (TYPE) b_t;
768 S5 prod_T = a_T * b_T;
769 S6 u_prod_T = (unsigned TYPE) prod_T;
771 and multiplication by constants:
773 type a_t;
774 TYPE a_T, prod_T;
776 S1 a_t = ;
777 S3 a_T = (TYPE) a_t;
778 S5 prod_T = a_T * CONST;
780 A special case of multiplication by constants is when 'TYPE' is 4 times
781 bigger than 'type', but CONST fits an intermediate type 2 times smaller
782 than 'TYPE'. In that case we create an additional pattern stmt for S3
783 to create a variable of the intermediate type, and perform widen-mult
784 on the intermediate type as well:
786 type a_t;
787 interm_type a_it;
788 TYPE a_T, prod_T, prod_T';
790 S1 a_t = ;
791 S3 a_T = (TYPE) a_t;
792 '--> a_it = (interm_type) a_t;
793 S5 prod_T = a_T * CONST;
794 '--> prod_T' = a_it w* CONST;
796 Input/Output:
798 * STMTS: Contains a stmt from which the pattern search begins. In the
799 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
800 is detected. In case of unsigned widen-mult, the original stmt (S5) is
801 replaced with S6 in STMTS. In case of multiplication by a constant
802 of an intermediate type (the last case above), STMTS also contains S3
803 (inserted before S5).
805 Output:
807 * TYPE_IN: The type of the input arguments to the pattern.
809 * TYPE_OUT: The type of the output of this pattern.
811 * Return value: A new stmt that will be used to replace the sequence of
812 stmts that constitute the pattern. In this case it will be:
813 WIDEN_MULT <a_t, b_t>
814 If the result of WIDEN_MULT needs to be converted to a larger type, the
815 returned stmt will be this type conversion stmt.
818 static gimple *
819 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
820 tree *type_in, tree *type_out)
822 gimple *last_stmt = stmts->pop ();
823 gimple *def_stmt0, *def_stmt1;
824 tree oprnd0, oprnd1;
825 tree type, half_type0, half_type1;
826 gimple *new_stmt = NULL, *pattern_stmt = NULL;
827 tree vectype, vecitype;
828 tree var;
829 enum tree_code dummy_code;
830 int dummy_int;
831 vec<tree> dummy_vec;
832 bool op1_ok;
833 bool promotion;
835 if (!is_gimple_assign (last_stmt))
836 return NULL;
838 type = gimple_expr_type (last_stmt);
840 /* Starting from LAST_STMT, follow the defs of its uses in search
841 of the above pattern. */
843 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
844 return NULL;
846 oprnd0 = gimple_assign_rhs1 (last_stmt);
847 oprnd1 = gimple_assign_rhs2 (last_stmt);
848 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
849 || !types_compatible_p (TREE_TYPE (oprnd1), type))
850 return NULL;
852 /* Check argument 0. */
853 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
854 &promotion)
855 || !promotion)
856 return NULL;
857 /* Check argument 1. */
858 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
859 &def_stmt1, &promotion);
861 if (op1_ok && promotion)
863 oprnd0 = gimple_assign_rhs1 (def_stmt0);
864 oprnd1 = gimple_assign_rhs1 (def_stmt1);
866 else
868 if (TREE_CODE (oprnd1) == INTEGER_CST
869 && TREE_CODE (half_type0) == INTEGER_TYPE
870 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
871 &oprnd0, &new_stmt, type,
872 &half_type0, def_stmt0))
874 half_type1 = half_type0;
875 oprnd1 = fold_convert (half_type1, oprnd1);
877 else
878 return NULL;
881 /* If the two arguments have different sizes, convert the one with
882 the smaller type into the larger type. */
883 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
885 /* If we already used up the single-stmt slot give up. */
886 if (new_stmt)
887 return NULL;
889 tree* oprnd = NULL;
890 gimple *def_stmt = NULL;
892 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
894 def_stmt = def_stmt0;
895 half_type0 = half_type1;
896 oprnd = &oprnd0;
898 else
900 def_stmt = def_stmt1;
901 half_type1 = half_type0;
902 oprnd = &oprnd1;
905 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
906 tree new_oprnd = make_ssa_name (half_type0);
907 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
908 *oprnd = new_oprnd;
911 /* Handle unsigned case. Look for
912 S6 u_prod_T = (unsigned TYPE) prod_T;
913 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
914 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
916 gimple *use_stmt;
917 tree use_lhs;
918 tree use_type;
920 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
921 return NULL;
923 use_stmt = vect_single_imm_use (last_stmt);
924 if (!use_stmt || !is_gimple_assign (use_stmt)
925 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
926 return NULL;
928 use_lhs = gimple_assign_lhs (use_stmt);
929 use_type = TREE_TYPE (use_lhs);
930 if (!INTEGRAL_TYPE_P (use_type)
931 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
932 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
933 return NULL;
935 type = use_type;
936 last_stmt = use_stmt;
939 if (!types_compatible_p (half_type0, half_type1))
940 return NULL;
942 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
943 to get an intermediate result of type ITYPE. In this case we need
944 to build a statement to convert this intermediate result to type TYPE. */
945 tree itype = type;
946 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
947 itype = build_nonstandard_integer_type
948 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
949 TYPE_UNSIGNED (type));
951 /* Pattern detected. */
952 if (dump_enabled_p ())
953 dump_printf_loc (MSG_NOTE, vect_location,
954 "vect_recog_widen_mult_pattern: detected:\n");
956 /* Check target support */
957 vectype = get_vectype_for_scalar_type (half_type0);
958 vecitype = get_vectype_for_scalar_type (itype);
959 if (!vectype
960 || !vecitype
961 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
962 vecitype, vectype,
963 &dummy_code, &dummy_code,
964 &dummy_int, &dummy_vec))
965 return NULL;
967 *type_in = vectype;
968 *type_out = get_vectype_for_scalar_type (type);
970 /* Pattern supported. Create a stmt to be used to replace the pattern: */
971 var = vect_recog_temp_ssa_var (itype, NULL);
972 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
974 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
975 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
977 /* If the original two operands have different sizes, we may need to convert
978 the smaller one into the larget type. If this is the case, at this point
979 the new stmt is already built. */
980 if (new_stmt)
982 append_pattern_def_seq (stmt_vinfo, new_stmt);
983 stmt_vec_info new_stmt_info
984 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
985 set_vinfo_for_stmt (new_stmt, new_stmt_info);
986 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
989 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
990 the result of the widen-mult operation into type TYPE. */
991 if (itype != type)
993 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
994 stmt_vec_info pattern_stmt_info
995 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
996 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
997 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
998 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
999 NOP_EXPR,
1000 gimple_assign_lhs (pattern_stmt));
1003 if (dump_enabled_p ())
1004 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1006 stmts->safe_push (last_stmt);
1007 return pattern_stmt;
1011 /* Function vect_recog_pow_pattern
1013 Try to find the following pattern:
1015 x = POW (y, N);
1017 with POW being one of pow, powf, powi, powif and N being
1018 either 2 or 0.5.
1020 Input:
1022 * LAST_STMT: A stmt from which the pattern search begins.
1024 Output:
1026 * TYPE_IN: The type of the input arguments to the pattern.
1028 * TYPE_OUT: The type of the output of this pattern.
1030 * Return value: A new stmt that will be used to replace the sequence of
1031 stmts that constitute the pattern. In this case it will be:
1032 x = x * x
1034 x = sqrt (x)
1037 static gimple *
1038 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1039 tree *type_out)
1041 gimple *last_stmt = (*stmts)[0];
1042 tree base, exp = NULL;
1043 gimple *stmt;
1044 tree var;
1046 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1047 return NULL;
1049 switch (gimple_call_combined_fn (last_stmt))
1051 CASE_CFN_POW:
1052 CASE_CFN_POWI:
1053 base = gimple_call_arg (last_stmt, 0);
1054 exp = gimple_call_arg (last_stmt, 1);
1055 if (TREE_CODE (exp) != REAL_CST
1056 && TREE_CODE (exp) != INTEGER_CST)
1057 return NULL;
1058 break;
1060 default:
1061 return NULL;
1064 /* We now have a pow or powi builtin function call with a constant
1065 exponent. */
1067 *type_out = NULL_TREE;
1069 /* Catch squaring. */
1070 if ((tree_fits_shwi_p (exp)
1071 && tree_to_shwi (exp) == 2)
1072 || (TREE_CODE (exp) == REAL_CST
1073 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1075 *type_in = TREE_TYPE (base);
1077 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1078 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1079 return stmt;
1082 /* Catch square root. */
1083 if (TREE_CODE (exp) == REAL_CST
1084 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1086 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1087 if (*type_in
1088 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1089 OPTIMIZE_FOR_SPEED))
1091 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1092 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1093 gimple_call_set_lhs (stmt, var);
1094 gimple_call_set_nothrow (stmt, true);
1095 return stmt;
1099 return NULL;
1103 /* Function vect_recog_widen_sum_pattern
1105 Try to find the following pattern:
1107 type x_t;
1108 TYPE x_T, sum = init;
1109 loop:
1110 sum_0 = phi <init, sum_1>
1111 S1 x_t = *p;
1112 S2 x_T = (TYPE) x_t;
1113 S3 sum_1 = x_T + sum_0;
1115 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1116 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1117 a special case of a reduction computation.
1119 Input:
1121 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1122 when this function is called with S3, the pattern {S2,S3} will be detected.
1124 Output:
1126 * TYPE_IN: The type of the input arguments to the pattern.
1128 * TYPE_OUT: The type of the output of this pattern.
1130 * Return value: A new stmt that will be used to replace the sequence of
1131 stmts that constitute the pattern. In this case it will be:
1132 WIDEN_SUM <x_t, sum_0>
1134 Note: The widening-sum idiom is a widening reduction pattern that is
1135 vectorized without preserving all the intermediate results. It
1136 produces only N/2 (widened) results (by summing up pairs of
1137 intermediate results) rather than all N results. Therefore, we
1138 cannot allow this pattern when we want to get all the results and in
1139 the correct order (as is the case when this computation is in an
1140 inner-loop nested in an outer-loop that us being vectorized). */
1142 static gimple *
1143 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1144 tree *type_out)
1146 gimple *stmt, *last_stmt = (*stmts)[0];
1147 tree oprnd0, oprnd1;
1148 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1149 tree type, half_type;
1150 gimple *pattern_stmt;
1151 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1152 struct loop *loop;
1153 tree var;
1154 bool promotion;
1156 if (!loop_info)
1157 return NULL;
1159 loop = LOOP_VINFO_LOOP (loop_info);
1161 /* We don't allow changing the order of the computation in the inner-loop
1162 when doing outer-loop vectorization. */
1163 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1164 return NULL;
1166 if (!is_gimple_assign (last_stmt))
1167 return NULL;
1169 type = gimple_expr_type (last_stmt);
1171 /* Look for the following pattern
1172 DX = (TYPE) X;
1173 sum_1 = DX + sum_0;
1174 In which DX is at least double the size of X, and sum_1 has been
1175 recognized as a reduction variable.
1178 /* Starting from LAST_STMT, follow the defs of its uses in search
1179 of the above pattern. */
1181 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1182 return NULL;
1184 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
1185 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
1186 return NULL;
1188 oprnd0 = gimple_assign_rhs1 (last_stmt);
1189 oprnd1 = gimple_assign_rhs2 (last_stmt);
1190 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1191 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1192 return NULL;
1194 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1195 we know that oprnd1 is the reduction variable (defined by a loop-header
1196 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1197 Left to check that oprnd0 is defined by a cast from type 'type' to type
1198 'TYPE'. */
1200 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1201 &promotion)
1202 || !promotion)
1203 return NULL;
1205 oprnd0 = gimple_assign_rhs1 (stmt);
1206 *type_in = half_type;
1207 *type_out = type;
1209 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1210 var = vect_recog_temp_ssa_var (type, NULL);
1211 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1213 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_NOTE, vect_location,
1216 "vect_recog_widen_sum_pattern: detected: ");
1217 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1220 return pattern_stmt;
1224 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1226 Input:
1227 STMT - a statement to check.
1228 DEF - we support operations with two operands, one of which is constant.
1229 The other operand can be defined by a demotion operation, or by a
1230 previous statement in a sequence of over-promoted operations. In the
1231 later case DEF is used to replace that operand. (It is defined by a
1232 pattern statement we created for the previous statement in the
1233 sequence).
1235 Input/output:
1236 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1237 NULL, it's the type of DEF.
1238 STMTS - additional pattern statements. If a pattern statement (type
1239 conversion) is created in this function, its original statement is
1240 added to STMTS.
1242 Output:
1243 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1244 operands to use in the new pattern statement for STMT (will be created
1245 in vect_recog_over_widening_pattern ()).
1246 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1247 statements for STMT: the first one is a type promotion and the second
1248 one is the operation itself. We return the type promotion statement
1249 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1250 the second pattern statement. */
1252 static bool
1253 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1254 tree *op0, tree *op1, gimple **new_def_stmt,
1255 vec<gimple *> *stmts)
1257 enum tree_code code;
1258 tree const_oprnd, oprnd;
1259 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1260 gimple *def_stmt, *new_stmt;
1261 bool first = false;
1262 bool promotion;
1264 *op0 = NULL_TREE;
1265 *op1 = NULL_TREE;
1266 *new_def_stmt = NULL;
1268 if (!is_gimple_assign (stmt))
1269 return false;
1271 code = gimple_assign_rhs_code (stmt);
1272 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1273 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1274 return false;
1276 oprnd = gimple_assign_rhs1 (stmt);
1277 const_oprnd = gimple_assign_rhs2 (stmt);
1278 type = gimple_expr_type (stmt);
1280 if (TREE_CODE (oprnd) != SSA_NAME
1281 || TREE_CODE (const_oprnd) != INTEGER_CST)
1282 return false;
1284 /* If oprnd has other uses besides that in stmt we cannot mark it
1285 as being part of a pattern only. */
1286 if (!has_single_use (oprnd))
1287 return false;
1289 /* If we are in the middle of a sequence, we use DEF from a previous
1290 statement. Otherwise, OPRND has to be a result of type promotion. */
1291 if (*new_type)
1293 half_type = *new_type;
1294 oprnd = def;
1296 else
1298 first = true;
1299 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1300 &promotion)
1301 || !promotion
1302 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1303 return false;
1306 /* Can we perform the operation on a smaller type? */
1307 switch (code)
1309 case BIT_IOR_EXPR:
1310 case BIT_XOR_EXPR:
1311 case BIT_AND_EXPR:
1312 if (!int_fits_type_p (const_oprnd, half_type))
1314 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1315 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1316 return false;
1318 interm_type = build_nonstandard_integer_type (
1319 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1320 if (!int_fits_type_p (const_oprnd, interm_type))
1321 return false;
1324 break;
1326 case LSHIFT_EXPR:
1327 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1328 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1329 return false;
1331 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1332 (e.g., if the original value was char, the shift amount is at most 8
1333 if we want to use short). */
1334 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1335 return false;
1337 interm_type = build_nonstandard_integer_type (
1338 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1340 if (!vect_supportable_shift (code, interm_type))
1341 return false;
1343 break;
1345 case RSHIFT_EXPR:
1346 if (vect_supportable_shift (code, half_type))
1347 break;
1349 /* Try intermediate type - HALF_TYPE is not supported. */
1350 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1351 return false;
1353 interm_type = build_nonstandard_integer_type (
1354 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1356 if (!vect_supportable_shift (code, interm_type))
1357 return false;
1359 break;
1361 default:
1362 gcc_unreachable ();
1365 /* There are four possible cases:
1366 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1367 the first statement in the sequence)
1368 a. The original, HALF_TYPE, is not enough - we replace the promotion
1369 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1370 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1371 promotion.
1372 2. OPRND is defined by a pattern statement we created.
1373 a. Its type is not sufficient for the operation, we create a new stmt:
1374 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1375 this statement in NEW_DEF_STMT, and it is later put in
1376 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1377 b. OPRND is good to use in the new statement. */
1378 if (first)
1380 if (interm_type)
1382 /* Replace the original type conversion HALF_TYPE->TYPE with
1383 HALF_TYPE->INTERM_TYPE. */
1384 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1386 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1387 /* Check if the already created pattern stmt is what we need. */
1388 if (!is_gimple_assign (new_stmt)
1389 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1390 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1391 return false;
1393 stmts->safe_push (def_stmt);
1394 oprnd = gimple_assign_lhs (new_stmt);
1396 else
1398 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1399 oprnd = gimple_assign_rhs1 (def_stmt);
1400 new_oprnd = make_ssa_name (interm_type);
1401 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1402 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1403 stmts->safe_push (def_stmt);
1404 oprnd = new_oprnd;
1407 else
1409 /* Retrieve the operand before the type promotion. */
1410 oprnd = gimple_assign_rhs1 (def_stmt);
1413 else
1415 if (interm_type)
1417 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1418 new_oprnd = make_ssa_name (interm_type);
1419 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1420 oprnd = new_oprnd;
1421 *new_def_stmt = new_stmt;
1424 /* Otherwise, OPRND is already set. */
1427 if (interm_type)
1428 *new_type = interm_type;
1429 else
1430 *new_type = half_type;
1432 *op0 = oprnd;
1433 *op1 = fold_convert (*new_type, const_oprnd);
1435 return true;
1439 /* Try to find a statement or a sequence of statements that can be performed
1440 on a smaller type:
1442 type x_t;
1443 TYPE x_T, res0_T, res1_T;
1444 loop:
1445 S1 x_t = *p;
1446 S2 x_T = (TYPE) x_t;
1447 S3 res0_T = op (x_T, C0);
1448 S4 res1_T = op (res0_T, C1);
1449 S5 ... = () res1_T; - type demotion
1451 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1452 constants.
1453 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1454 be 'type' or some intermediate type. For now, we expect S5 to be a type
1455 demotion operation. We also check that S3 and S4 have only one use. */
1457 static gimple *
1458 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1459 tree *type_in, tree *type_out)
1461 gimple *stmt = stmts->pop ();
1462 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1463 *use_stmt = NULL;
1464 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1465 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1466 bool first;
1467 tree type = NULL;
1469 first = true;
1470 while (1)
1472 if (!vinfo_for_stmt (stmt)
1473 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1474 return NULL;
1476 new_def_stmt = NULL;
1477 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1478 &op0, &op1, &new_def_stmt,
1479 stmts))
1481 if (first)
1482 return NULL;
1483 else
1484 break;
1487 /* STMT can be performed on a smaller type. Check its uses. */
1488 use_stmt = vect_single_imm_use (stmt);
1489 if (!use_stmt || !is_gimple_assign (use_stmt))
1490 return NULL;
1492 /* Create pattern statement for STMT. */
1493 vectype = get_vectype_for_scalar_type (new_type);
1494 if (!vectype)
1495 return NULL;
1497 /* We want to collect all the statements for which we create pattern
1498 statetments, except for the case when the last statement in the
1499 sequence doesn't have a corresponding pattern statement. In such
1500 case we associate the last pattern statement with the last statement
1501 in the sequence. Therefore, we only add the original statement to
1502 the list if we know that it is not the last. */
1503 if (prev_stmt)
1504 stmts->safe_push (prev_stmt);
1506 var = vect_recog_temp_ssa_var (new_type, NULL);
1507 pattern_stmt
1508 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1509 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1510 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1512 if (dump_enabled_p ())
1514 dump_printf_loc (MSG_NOTE, vect_location,
1515 "created pattern stmt: ");
1516 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1519 type = gimple_expr_type (stmt);
1520 prev_stmt = stmt;
1521 stmt = use_stmt;
1523 first = false;
1526 /* We got a sequence. We expect it to end with a type demotion operation.
1527 Otherwise, we quit (for now). There are three possible cases: the
1528 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1529 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1530 NEW_TYPE differs (we create a new conversion statement). */
1531 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1533 use_lhs = gimple_assign_lhs (use_stmt);
1534 use_type = TREE_TYPE (use_lhs);
1535 /* Support only type demotion or signedess change. */
1536 if (!INTEGRAL_TYPE_P (use_type)
1537 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1538 return NULL;
1540 /* Check that NEW_TYPE is not bigger than the conversion result. */
1541 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1542 return NULL;
1544 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1545 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1547 /* Create NEW_TYPE->USE_TYPE conversion. */
1548 new_oprnd = make_ssa_name (use_type);
1549 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1550 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1552 *type_in = get_vectype_for_scalar_type (new_type);
1553 *type_out = get_vectype_for_scalar_type (use_type);
1555 /* We created a pattern statement for the last statement in the
1556 sequence, so we don't need to associate it with the pattern
1557 statement created for PREV_STMT. Therefore, we add PREV_STMT
1558 to the list in order to mark it later in vect_pattern_recog_1. */
1559 if (prev_stmt)
1560 stmts->safe_push (prev_stmt);
1562 else
1564 if (prev_stmt)
1565 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1566 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1568 *type_in = vectype;
1569 *type_out = NULL_TREE;
1572 stmts->safe_push (use_stmt);
1574 else
1575 /* TODO: support general case, create a conversion to the correct type. */
1576 return NULL;
1578 /* Pattern detected. */
1579 if (dump_enabled_p ())
1581 dump_printf_loc (MSG_NOTE, vect_location,
1582 "vect_recog_over_widening_pattern: detected: ");
1583 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1586 return pattern_stmt;
1589 /* Detect widening shift pattern:
1591 type a_t;
1592 TYPE a_T, res_T;
1594 S1 a_t = ;
1595 S2 a_T = (TYPE) a_t;
1596 S3 res_T = a_T << CONST;
1598 where type 'TYPE' is at least double the size of type 'type'.
1600 Also detect cases where the shift result is immediately converted
1601 to another type 'result_type' that is no larger in size than 'TYPE'.
1602 In those cases we perform a widen-shift that directly results in
1603 'result_type', to avoid a possible over-widening situation:
1605 type a_t;
1606 TYPE a_T, res_T;
1607 result_type res_result;
1609 S1 a_t = ;
1610 S2 a_T = (TYPE) a_t;
1611 S3 res_T = a_T << CONST;
1612 S4 res_result = (result_type) res_T;
1613 '--> res_result' = a_t w<< CONST;
1615 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1616 create an additional pattern stmt for S2 to create a variable of an
1617 intermediate type, and perform widen-shift on the intermediate type:
1619 type a_t;
1620 interm_type a_it;
1621 TYPE a_T, res_T, res_T';
1623 S1 a_t = ;
1624 S2 a_T = (TYPE) a_t;
1625 '--> a_it = (interm_type) a_t;
1626 S3 res_T = a_T << CONST;
1627 '--> res_T' = a_it <<* CONST;
1629 Input/Output:
1631 * STMTS: Contains a stmt from which the pattern search begins.
1632 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1633 in STMTS. When an intermediate type is used and a pattern statement is
1634 created for S2, we also put S2 here (before S3).
1636 Output:
1638 * TYPE_IN: The type of the input arguments to the pattern.
1640 * TYPE_OUT: The type of the output of this pattern.
1642 * Return value: A new stmt that will be used to replace the sequence of
1643 stmts that constitute the pattern. In this case it will be:
1644 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1646 static gimple *
1647 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1648 tree *type_in, tree *type_out)
1650 gimple *last_stmt = stmts->pop ();
1651 gimple *def_stmt0;
1652 tree oprnd0, oprnd1;
1653 tree type, half_type0;
1654 gimple *pattern_stmt;
1655 tree vectype, vectype_out = NULL_TREE;
1656 tree var;
1657 enum tree_code dummy_code;
1658 int dummy_int;
1659 vec<tree> dummy_vec;
1660 gimple *use_stmt;
1661 bool promotion;
1663 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1664 return NULL;
1666 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1667 return NULL;
1669 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1670 return NULL;
1672 oprnd0 = gimple_assign_rhs1 (last_stmt);
1673 oprnd1 = gimple_assign_rhs2 (last_stmt);
1674 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1675 return NULL;
1677 /* Check operand 0: it has to be defined by a type promotion. */
1678 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1679 &promotion)
1680 || !promotion)
1681 return NULL;
1683 /* Check operand 1: has to be positive. We check that it fits the type
1684 in vect_handle_widen_op_by_const (). */
1685 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1686 return NULL;
1688 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1689 type = gimple_expr_type (last_stmt);
1691 /* Check for subsequent conversion to another type. */
1692 use_stmt = vect_single_imm_use (last_stmt);
1693 if (use_stmt && is_gimple_assign (use_stmt)
1694 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1695 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1697 tree use_lhs = gimple_assign_lhs (use_stmt);
1698 tree use_type = TREE_TYPE (use_lhs);
1700 if (INTEGRAL_TYPE_P (use_type)
1701 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1703 last_stmt = use_stmt;
1704 type = use_type;
1708 /* Check if this a widening operation. */
1709 gimple *wstmt = NULL;
1710 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1711 &oprnd0, &wstmt,
1712 type, &half_type0, def_stmt0))
1713 return NULL;
1715 /* Pattern detected. */
1716 if (dump_enabled_p ())
1717 dump_printf_loc (MSG_NOTE, vect_location,
1718 "vect_recog_widen_shift_pattern: detected:\n");
1720 /* Check target support. */
1721 vectype = get_vectype_for_scalar_type (half_type0);
1722 vectype_out = get_vectype_for_scalar_type (type);
1724 if (!vectype
1725 || !vectype_out
1726 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1727 vectype_out, vectype,
1728 &dummy_code, &dummy_code,
1729 &dummy_int, &dummy_vec))
1730 return NULL;
1732 *type_in = vectype;
1733 *type_out = vectype_out;
1735 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1736 var = vect_recog_temp_ssa_var (type, NULL);
1737 pattern_stmt =
1738 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1739 if (wstmt)
1741 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1742 new_pattern_def_seq (stmt_vinfo, wstmt);
1743 stmt_vec_info new_stmt_info
1744 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1745 set_vinfo_for_stmt (wstmt, new_stmt_info);
1746 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1749 if (dump_enabled_p ())
1750 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1752 stmts->safe_push (last_stmt);
1753 return pattern_stmt;
1756 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1758 type a_t, b_t, c_t;
1760 S0 a_t = b_t r<< c_t;
1762 Input/Output:
1764 * STMTS: Contains a stmt from which the pattern search begins,
1765 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1766 with a sequence:
1768 S1 d_t = -c_t;
1769 S2 e_t = d_t & (B - 1);
1770 S3 f_t = b_t << c_t;
1771 S4 g_t = b_t >> e_t;
1772 S0 a_t = f_t | g_t;
1774 where B is element bitsize of type.
1776 Output:
1778 * TYPE_IN: The type of the input arguments to the pattern.
1780 * TYPE_OUT: The type of the output of this pattern.
1782 * Return value: A new stmt that will be used to replace the rotate
1783 S0 stmt. */
1785 static gimple *
1786 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1788 gimple *last_stmt = stmts->pop ();
1789 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1790 gimple *pattern_stmt, *def_stmt;
1791 enum tree_code rhs_code;
1792 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1793 vec_info *vinfo = stmt_vinfo->vinfo;
1794 enum vect_def_type dt;
1795 optab optab1, optab2;
1796 edge ext_def = NULL;
1798 if (!is_gimple_assign (last_stmt))
1799 return NULL;
1801 rhs_code = gimple_assign_rhs_code (last_stmt);
1802 switch (rhs_code)
1804 case LROTATE_EXPR:
1805 case RROTATE_EXPR:
1806 break;
1807 default:
1808 return NULL;
1811 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1812 return NULL;
1814 lhs = gimple_assign_lhs (last_stmt);
1815 oprnd0 = gimple_assign_rhs1 (last_stmt);
1816 type = TREE_TYPE (oprnd0);
1817 oprnd1 = gimple_assign_rhs2 (last_stmt);
1818 if (TREE_CODE (oprnd0) != SSA_NAME
1819 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1820 || !INTEGRAL_TYPE_P (type)
1821 || !TYPE_UNSIGNED (type))
1822 return NULL;
1824 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1825 return NULL;
1827 if (dt != vect_internal_def
1828 && dt != vect_constant_def
1829 && dt != vect_external_def)
1830 return NULL;
1832 vectype = get_vectype_for_scalar_type (type);
1833 if (vectype == NULL_TREE)
1834 return NULL;
1836 /* If vector/vector or vector/scalar rotate is supported by the target,
1837 don't do anything here. */
1838 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1839 if (optab1
1840 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1841 return NULL;
1843 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1845 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1846 if (optab2
1847 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1848 return NULL;
1851 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1852 don't do anything here either. */
1853 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1854 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1855 if (!optab1
1856 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1857 || !optab2
1858 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1860 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1861 return NULL;
1862 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1863 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1864 if (!optab1
1865 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1866 || !optab2
1867 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1868 return NULL;
1871 *type_in = vectype;
1872 *type_out = vectype;
1873 if (*type_in == NULL_TREE)
1874 return NULL;
1876 if (dt == vect_external_def
1877 && TREE_CODE (oprnd1) == SSA_NAME
1878 && is_a <loop_vec_info> (vinfo))
1880 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1881 ext_def = loop_preheader_edge (loop);
1882 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1884 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1885 if (bb == NULL
1886 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1887 ext_def = NULL;
1891 def = NULL_TREE;
1892 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1893 if (TREE_CODE (oprnd1) == INTEGER_CST
1894 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1895 def = oprnd1;
1896 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1898 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1899 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1900 && TYPE_PRECISION (TREE_TYPE (rhs1))
1901 == TYPE_PRECISION (type))
1902 def = rhs1;
1905 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1906 if (def == NULL_TREE)
1908 def = vect_recog_temp_ssa_var (type, NULL);
1909 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1910 if (ext_def)
1912 basic_block new_bb
1913 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1914 gcc_assert (!new_bb);
1916 else
1917 append_pattern_def_seq (stmt_vinfo, def_stmt);
1919 stype = TREE_TYPE (def);
1920 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1922 if (TREE_CODE (def) == INTEGER_CST)
1924 if (!tree_fits_uhwi_p (def)
1925 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
1926 || integer_zerop (def))
1927 return NULL;
1928 def2 = build_int_cst (stype,
1929 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
1931 else
1933 tree vecstype = get_vectype_for_scalar_type (stype);
1934 stmt_vec_info def_stmt_vinfo;
1936 if (vecstype == NULL_TREE)
1937 return NULL;
1938 def2 = vect_recog_temp_ssa_var (stype, NULL);
1939 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1940 if (ext_def)
1942 basic_block new_bb
1943 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1944 gcc_assert (!new_bb);
1946 else
1948 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1949 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1950 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1951 append_pattern_def_seq (stmt_vinfo, def_stmt);
1954 def2 = vect_recog_temp_ssa_var (stype, NULL);
1955 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
1956 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1957 gimple_assign_lhs (def_stmt), mask);
1958 if (ext_def)
1960 basic_block new_bb
1961 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1962 gcc_assert (!new_bb);
1964 else
1966 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1967 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1968 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1969 append_pattern_def_seq (stmt_vinfo, def_stmt);
1973 var1 = vect_recog_temp_ssa_var (type, NULL);
1974 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1975 ? LSHIFT_EXPR : RSHIFT_EXPR,
1976 oprnd0, def);
1977 append_pattern_def_seq (stmt_vinfo, def_stmt);
1979 var2 = vect_recog_temp_ssa_var (type, NULL);
1980 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1981 ? RSHIFT_EXPR : LSHIFT_EXPR,
1982 oprnd0, def2);
1983 append_pattern_def_seq (stmt_vinfo, def_stmt);
1985 /* Pattern detected. */
1986 if (dump_enabled_p ())
1987 dump_printf_loc (MSG_NOTE, vect_location,
1988 "vect_recog_rotate_pattern: detected:\n");
1990 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1991 var = vect_recog_temp_ssa_var (type, NULL);
1992 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1994 if (dump_enabled_p ())
1995 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1997 stmts->safe_push (last_stmt);
1998 return pattern_stmt;
2001 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2002 vectorized:
2004 type a_t;
2005 TYPE b_T, res_T;
2007 S1 a_t = ;
2008 S2 b_T = ;
2009 S3 res_T = b_T op a_t;
2011 where type 'TYPE' is a type with different size than 'type',
2012 and op is <<, >> or rotate.
2014 Also detect cases:
2016 type a_t;
2017 TYPE b_T, c_T, res_T;
2019 S0 c_T = ;
2020 S1 a_t = (type) c_T;
2021 S2 b_T = ;
2022 S3 res_T = b_T op a_t;
2024 Input/Output:
2026 * STMTS: Contains a stmt from which the pattern search begins,
2027 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2028 with a shift/rotate which has same type on both operands, in the
2029 second case just b_T op c_T, in the first case with added cast
2030 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2032 Output:
2034 * TYPE_IN: The type of the input arguments to the pattern.
2036 * TYPE_OUT: The type of the output of this pattern.
2038 * Return value: A new stmt that will be used to replace the shift/rotate
2039 S3 stmt. */
2041 static gimple *
2042 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2043 tree *type_in, tree *type_out)
2045 gimple *last_stmt = stmts->pop ();
2046 tree oprnd0, oprnd1, lhs, var;
2047 gimple *pattern_stmt, *def_stmt;
2048 enum tree_code rhs_code;
2049 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2050 vec_info *vinfo = stmt_vinfo->vinfo;
2051 enum vect_def_type dt;
2053 if (!is_gimple_assign (last_stmt))
2054 return NULL;
2056 rhs_code = gimple_assign_rhs_code (last_stmt);
2057 switch (rhs_code)
2059 case LSHIFT_EXPR:
2060 case RSHIFT_EXPR:
2061 case LROTATE_EXPR:
2062 case RROTATE_EXPR:
2063 break;
2064 default:
2065 return NULL;
2068 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2069 return NULL;
2071 lhs = gimple_assign_lhs (last_stmt);
2072 oprnd0 = gimple_assign_rhs1 (last_stmt);
2073 oprnd1 = gimple_assign_rhs2 (last_stmt);
2074 if (TREE_CODE (oprnd0) != SSA_NAME
2075 || TREE_CODE (oprnd1) != SSA_NAME
2076 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2077 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2078 || TYPE_PRECISION (TREE_TYPE (lhs))
2079 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2080 return NULL;
2082 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2083 return NULL;
2085 if (dt != vect_internal_def)
2086 return NULL;
2088 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2089 *type_out = *type_in;
2090 if (*type_in == NULL_TREE)
2091 return NULL;
2093 tree def = NULL_TREE;
2094 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2095 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2097 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2098 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2099 && TYPE_PRECISION (TREE_TYPE (rhs1))
2100 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2102 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2103 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2104 def = rhs1;
2105 else
2107 tree mask
2108 = build_low_bits_mask (TREE_TYPE (rhs1),
2109 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2110 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2111 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2112 new_pattern_def_seq (stmt_vinfo, def_stmt);
2117 if (def == NULL_TREE)
2119 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2120 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2121 new_pattern_def_seq (stmt_vinfo, def_stmt);
2124 /* Pattern detected. */
2125 if (dump_enabled_p ())
2126 dump_printf_loc (MSG_NOTE, vect_location,
2127 "vect_recog_vector_vector_shift_pattern: detected:\n");
2129 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2130 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2131 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2133 if (dump_enabled_p ())
2134 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2136 stmts->safe_push (last_stmt);
2137 return pattern_stmt;
2140 /* Return true iff the target has a vector optab implementing the operation
2141 CODE on type VECTYPE. */
2143 static bool
2144 target_has_vecop_for_code (tree_code code, tree vectype)
2146 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2147 return voptab
2148 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2151 /* Verify that the target has optabs of VECTYPE to perform all the steps
2152 needed by the multiplication-by-immediate synthesis algorithm described by
2153 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2154 present. Return true iff the target supports all the steps. */
2156 static bool
2157 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2158 tree vectype, bool synth_shift_p)
2160 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2161 return false;
2163 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2164 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2166 if (var == negate_variant
2167 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2168 return false;
2170 /* If we must synthesize shifts with additions make sure that vector
2171 addition is available. */
2172 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2173 return false;
2175 for (int i = 1; i < alg->ops; i++)
2177 switch (alg->op[i])
2179 case alg_shift:
2180 break;
2181 case alg_add_t_m2:
2182 case alg_add_t2_m:
2183 case alg_add_factor:
2184 if (!supports_vplus)
2185 return false;
2186 break;
2187 case alg_sub_t_m2:
2188 case alg_sub_t2_m:
2189 case alg_sub_factor:
2190 if (!supports_vminus)
2191 return false;
2192 break;
2193 case alg_unknown:
2194 case alg_m:
2195 case alg_zero:
2196 case alg_impossible:
2197 return false;
2198 default:
2199 gcc_unreachable ();
2203 return true;
2206 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2207 putting the final result in DEST. Append all statements but the last into
2208 VINFO. Return the last statement. */
2210 static gimple *
2211 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2212 stmt_vec_info vinfo)
2214 HOST_WIDE_INT i;
2215 tree itype = TREE_TYPE (op);
2216 tree prev_res = op;
2217 gcc_assert (amnt >= 0);
2218 for (i = 0; i < amnt; i++)
2220 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2221 : dest;
2222 gimple *stmt
2223 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2224 prev_res = tmp_var;
2225 if (i < amnt - 1)
2226 append_pattern_def_seq (vinfo, stmt);
2227 else
2228 return stmt;
2230 gcc_unreachable ();
2231 return NULL;
2234 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2235 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2236 the process if necessary. Append the resulting assignment statements
2237 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2238 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2239 left shifts using additions. */
2241 static tree
2242 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2243 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2245 if (integer_zerop (op2)
2246 && (code == LSHIFT_EXPR
2247 || code == PLUS_EXPR))
2249 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2250 return op1;
2253 gimple *stmt;
2254 tree itype = TREE_TYPE (op1);
2255 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2257 if (code == LSHIFT_EXPR
2258 && synth_shift_p)
2260 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2261 stmt_vinfo);
2262 append_pattern_def_seq (stmt_vinfo, stmt);
2263 return tmp_var;
2266 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2267 append_pattern_def_seq (stmt_vinfo, stmt);
2268 return tmp_var;
2271 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2272 and simple arithmetic operations to be vectorized. Record the statements
2273 produced in STMT_VINFO and return the last statement in the sequence or
2274 NULL if it's not possible to synthesize such a multiplication.
2275 This function mirrors the behavior of expand_mult_const in expmed.c but
2276 works on tree-ssa form. */
2278 static gimple *
2279 vect_synth_mult_by_constant (tree op, tree val,
2280 stmt_vec_info stmt_vinfo)
2282 tree itype = TREE_TYPE (op);
2283 machine_mode mode = TYPE_MODE (itype);
2284 struct algorithm alg;
2285 mult_variant variant;
2286 if (!tree_fits_shwi_p (val))
2287 return NULL;
2289 /* Multiplication synthesis by shifts, adds and subs can introduce
2290 signed overflow where the original operation didn't. Perform the
2291 operations on an unsigned type and cast back to avoid this.
2292 In the future we may want to relax this for synthesis algorithms
2293 that we can prove do not cause unexpected overflow. */
2294 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2296 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2298 /* Targets that don't support vector shifts but support vector additions
2299 can synthesize shifts that way. */
2300 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2302 HOST_WIDE_INT hwval = tree_to_shwi (val);
2303 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2304 The vectorizer's benefit analysis will decide whether it's beneficial
2305 to do this. */
2306 bool possible = choose_mult_variant (mode, hwval, &alg,
2307 &variant, MAX_COST);
2308 if (!possible)
2309 return NULL;
2311 tree vectype = get_vectype_for_scalar_type (multtype);
2313 if (!vectype
2314 || !target_supports_mult_synth_alg (&alg, variant,
2315 vectype, synth_shift_p))
2316 return NULL;
2318 tree accumulator;
2320 /* Clear out the sequence of statements so we can populate it below. */
2321 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2322 gimple *stmt = NULL;
2324 if (cast_to_unsigned_p)
2326 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2327 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2328 append_pattern_def_seq (stmt_vinfo, stmt);
2329 op = tmp_op;
2332 if (alg.op[0] == alg_zero)
2333 accumulator = build_int_cst (multtype, 0);
2334 else
2335 accumulator = op;
2337 bool needs_fixup = (variant == negate_variant)
2338 || (variant == add_variant);
2340 for (int i = 1; i < alg.ops; i++)
2342 tree shft_log = build_int_cst (multtype, alg.log[i]);
2343 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2344 tree tmp_var = NULL_TREE;
2346 switch (alg.op[i])
2348 case alg_shift:
2349 if (synth_shift_p)
2350 stmt
2351 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2352 stmt_vinfo);
2353 else
2354 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2355 shft_log);
2356 break;
2357 case alg_add_t_m2:
2358 tmp_var
2359 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2360 stmt_vinfo, synth_shift_p);
2361 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2362 tmp_var);
2363 break;
2364 case alg_sub_t_m2:
2365 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2366 shft_log, stmt_vinfo,
2367 synth_shift_p);
2368 /* In some algorithms the first step involves zeroing the
2369 accumulator. If subtracting from such an accumulator
2370 just emit the negation directly. */
2371 if (integer_zerop (accumulator))
2372 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2373 else
2374 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2375 tmp_var);
2376 break;
2377 case alg_add_t2_m:
2378 tmp_var
2379 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2380 stmt_vinfo, synth_shift_p);
2381 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2382 break;
2383 case alg_sub_t2_m:
2384 tmp_var
2385 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2386 stmt_vinfo, synth_shift_p);
2387 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2388 break;
2389 case alg_add_factor:
2390 tmp_var
2391 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2392 stmt_vinfo, synth_shift_p);
2393 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2394 tmp_var);
2395 break;
2396 case alg_sub_factor:
2397 tmp_var
2398 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2399 stmt_vinfo, synth_shift_p);
2400 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2401 accumulator);
2402 break;
2403 default:
2404 gcc_unreachable ();
2406 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2407 but rather return it directly. */
2409 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2410 append_pattern_def_seq (stmt_vinfo, stmt);
2411 accumulator = accum_tmp;
2413 if (variant == negate_variant)
2415 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2416 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2417 accumulator = accum_tmp;
2418 if (cast_to_unsigned_p)
2419 append_pattern_def_seq (stmt_vinfo, stmt);
2421 else if (variant == add_variant)
2423 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2424 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2425 accumulator = accum_tmp;
2426 if (cast_to_unsigned_p)
2427 append_pattern_def_seq (stmt_vinfo, stmt);
2429 /* Move back to a signed if needed. */
2430 if (cast_to_unsigned_p)
2432 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2433 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2436 return stmt;
2439 /* Detect multiplication by constant and convert it into a sequence of
2440 shifts and additions, subtractions, negations. We reuse the
2441 choose_mult_variant algorithms from expmed.c
2443 Input/Output:
2445 STMTS: Contains a stmt from which the pattern search begins,
2446 i.e. the mult stmt.
2448 Output:
2450 * TYPE_IN: The type of the input arguments to the pattern.
2452 * TYPE_OUT: The type of the output of this pattern.
2454 * Return value: A new stmt that will be used to replace
2455 the multiplication. */
2457 static gimple *
2458 vect_recog_mult_pattern (vec<gimple *> *stmts,
2459 tree *type_in, tree *type_out)
2461 gimple *last_stmt = stmts->pop ();
2462 tree oprnd0, oprnd1, vectype, itype;
2463 gimple *pattern_stmt;
2464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2466 if (!is_gimple_assign (last_stmt))
2467 return NULL;
2469 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2470 return NULL;
2472 oprnd0 = gimple_assign_rhs1 (last_stmt);
2473 oprnd1 = gimple_assign_rhs2 (last_stmt);
2474 itype = TREE_TYPE (oprnd0);
2476 if (TREE_CODE (oprnd0) != SSA_NAME
2477 || TREE_CODE (oprnd1) != INTEGER_CST
2478 || !INTEGRAL_TYPE_P (itype)
2479 || !type_has_mode_precision_p (itype))
2480 return NULL;
2482 vectype = get_vectype_for_scalar_type (itype);
2483 if (vectype == NULL_TREE)
2484 return NULL;
2486 /* If the target can handle vectorized multiplication natively,
2487 don't attempt to optimize this. */
2488 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2489 if (mul_optab != unknown_optab)
2491 machine_mode vec_mode = TYPE_MODE (vectype);
2492 int icode = (int) optab_handler (mul_optab, vec_mode);
2493 if (icode != CODE_FOR_nothing)
2494 return NULL;
2497 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2498 if (!pattern_stmt)
2499 return NULL;
2501 /* Pattern detected. */
2502 if (dump_enabled_p ())
2503 dump_printf_loc (MSG_NOTE, vect_location,
2504 "vect_recog_mult_pattern: detected:\n");
2506 if (dump_enabled_p ())
2507 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2508 pattern_stmt,0);
2510 stmts->safe_push (last_stmt);
2511 *type_in = vectype;
2512 *type_out = vectype;
2514 return pattern_stmt;
2517 /* Detect a signed division by a constant that wouldn't be
2518 otherwise vectorized:
2520 type a_t, b_t;
2522 S1 a_t = b_t / N;
2524 where type 'type' is an integral type and N is a constant.
2526 Similarly handle modulo by a constant:
2528 S4 a_t = b_t % N;
2530 Input/Output:
2532 * STMTS: Contains a stmt from which the pattern search begins,
2533 i.e. the division stmt. S1 is replaced by if N is a power
2534 of two constant and type is signed:
2535 S3 y_t = b_t < 0 ? N - 1 : 0;
2536 S2 x_t = b_t + y_t;
2537 S1' a_t = x_t >> log2 (N);
2539 S4 is replaced if N is a power of two constant and
2540 type is signed by (where *_T temporaries have unsigned type):
2541 S9 y_T = b_t < 0 ? -1U : 0U;
2542 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2543 S7 z_t = (type) z_T;
2544 S6 w_t = b_t + z_t;
2545 S5 x_t = w_t & (N - 1);
2546 S4' a_t = x_t - z_t;
2548 Output:
2550 * TYPE_IN: The type of the input arguments to the pattern.
2552 * TYPE_OUT: The type of the output of this pattern.
2554 * Return value: A new stmt that will be used to replace the division
2555 S1 or modulo S4 stmt. */
2557 static gimple *
2558 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2559 tree *type_in, tree *type_out)
2561 gimple *last_stmt = stmts->pop ();
2562 tree oprnd0, oprnd1, vectype, itype, cond;
2563 gimple *pattern_stmt, *def_stmt;
2564 enum tree_code rhs_code;
2565 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2566 vec_info *vinfo = stmt_vinfo->vinfo;
2567 optab optab;
2568 tree q;
2569 int dummy_int, prec;
2570 stmt_vec_info def_stmt_vinfo;
2572 if (!is_gimple_assign (last_stmt))
2573 return NULL;
2575 rhs_code = gimple_assign_rhs_code (last_stmt);
2576 switch (rhs_code)
2578 case TRUNC_DIV_EXPR:
2579 case TRUNC_MOD_EXPR:
2580 break;
2581 default:
2582 return NULL;
2585 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2586 return NULL;
2588 oprnd0 = gimple_assign_rhs1 (last_stmt);
2589 oprnd1 = gimple_assign_rhs2 (last_stmt);
2590 itype = TREE_TYPE (oprnd0);
2591 if (TREE_CODE (oprnd0) != SSA_NAME
2592 || TREE_CODE (oprnd1) != INTEGER_CST
2593 || TREE_CODE (itype) != INTEGER_TYPE
2594 || !type_has_mode_precision_p (itype))
2595 return NULL;
2597 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2598 vectype = get_vectype_for_scalar_type (itype);
2599 if (vectype == NULL_TREE)
2600 return NULL;
2602 /* If the target can handle vectorized division or modulo natively,
2603 don't attempt to optimize this. */
2604 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2605 if (optab != unknown_optab)
2607 machine_mode vec_mode = TYPE_MODE (vectype);
2608 int icode = (int) optab_handler (optab, vec_mode);
2609 if (icode != CODE_FOR_nothing)
2610 return NULL;
2613 prec = TYPE_PRECISION (itype);
2614 if (integer_pow2p (oprnd1))
2616 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2617 return NULL;
2619 /* Pattern detected. */
2620 if (dump_enabled_p ())
2621 dump_printf_loc (MSG_NOTE, vect_location,
2622 "vect_recog_divmod_pattern: detected:\n");
2624 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2625 build_int_cst (itype, 0));
2626 if (rhs_code == TRUNC_DIV_EXPR)
2628 tree var = vect_recog_temp_ssa_var (itype, NULL);
2629 tree shift;
2630 def_stmt
2631 = gimple_build_assign (var, COND_EXPR, cond,
2632 fold_build2 (MINUS_EXPR, itype, oprnd1,
2633 build_int_cst (itype, 1)),
2634 build_int_cst (itype, 0));
2635 new_pattern_def_seq (stmt_vinfo, def_stmt);
2636 var = vect_recog_temp_ssa_var (itype, NULL);
2637 def_stmt
2638 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2639 gimple_assign_lhs (def_stmt));
2640 append_pattern_def_seq (stmt_vinfo, def_stmt);
2642 shift = build_int_cst (itype, tree_log2 (oprnd1));
2643 pattern_stmt
2644 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2645 RSHIFT_EXPR, var, shift);
2647 else
2649 tree signmask;
2650 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2651 if (compare_tree_int (oprnd1, 2) == 0)
2653 signmask = vect_recog_temp_ssa_var (itype, NULL);
2654 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2655 build_int_cst (itype, 1),
2656 build_int_cst (itype, 0));
2657 append_pattern_def_seq (stmt_vinfo, def_stmt);
2659 else
2661 tree utype
2662 = build_nonstandard_integer_type (prec, 1);
2663 tree vecutype = get_vectype_for_scalar_type (utype);
2664 tree shift
2665 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2666 - tree_log2 (oprnd1));
2667 tree var = vect_recog_temp_ssa_var (utype, NULL);
2669 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2670 build_int_cst (utype, -1),
2671 build_int_cst (utype, 0));
2672 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2673 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2674 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2675 append_pattern_def_seq (stmt_vinfo, def_stmt);
2676 var = vect_recog_temp_ssa_var (utype, NULL);
2677 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2678 gimple_assign_lhs (def_stmt),
2679 shift);
2680 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2681 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2682 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2683 append_pattern_def_seq (stmt_vinfo, def_stmt);
2684 signmask = vect_recog_temp_ssa_var (itype, NULL);
2685 def_stmt
2686 = gimple_build_assign (signmask, NOP_EXPR, var);
2687 append_pattern_def_seq (stmt_vinfo, def_stmt);
2689 def_stmt
2690 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2691 PLUS_EXPR, oprnd0, signmask);
2692 append_pattern_def_seq (stmt_vinfo, def_stmt);
2693 def_stmt
2694 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2695 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2696 fold_build2 (MINUS_EXPR, itype, oprnd1,
2697 build_int_cst (itype, 1)));
2698 append_pattern_def_seq (stmt_vinfo, def_stmt);
2700 pattern_stmt
2701 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2702 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2703 signmask);
2706 if (dump_enabled_p ())
2707 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2710 stmts->safe_push (last_stmt);
2712 *type_in = vectype;
2713 *type_out = vectype;
2714 return pattern_stmt;
2717 if (prec > HOST_BITS_PER_WIDE_INT
2718 || integer_zerop (oprnd1))
2719 return NULL;
2721 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2722 return NULL;
2724 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2726 if (TYPE_UNSIGNED (itype))
2728 unsigned HOST_WIDE_INT mh, ml;
2729 int pre_shift, post_shift;
2730 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2731 & GET_MODE_MASK (itype_mode));
2732 tree t1, t2, t3, t4;
2734 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2735 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2736 return NULL;
2738 /* Find a suitable multiplier and right shift count
2739 instead of multiplying with D. */
2740 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2742 /* If the suggested multiplier is more than SIZE bits, we can do better
2743 for even divisors, using an initial right shift. */
2744 if (mh != 0 && (d & 1) == 0)
2746 pre_shift = ctz_or_zero (d);
2747 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2748 &ml, &post_shift, &dummy_int);
2749 gcc_assert (!mh);
2751 else
2752 pre_shift = 0;
2754 if (mh != 0)
2756 if (post_shift - 1 >= prec)
2757 return NULL;
2759 /* t1 = oprnd0 h* ml;
2760 t2 = oprnd0 - t1;
2761 t3 = t2 >> 1;
2762 t4 = t1 + t3;
2763 q = t4 >> (post_shift - 1); */
2764 t1 = vect_recog_temp_ssa_var (itype, NULL);
2765 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2766 build_int_cst (itype, ml));
2767 append_pattern_def_seq (stmt_vinfo, def_stmt);
2769 t2 = vect_recog_temp_ssa_var (itype, NULL);
2770 def_stmt
2771 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2772 append_pattern_def_seq (stmt_vinfo, def_stmt);
2774 t3 = vect_recog_temp_ssa_var (itype, NULL);
2775 def_stmt
2776 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2777 append_pattern_def_seq (stmt_vinfo, def_stmt);
2779 t4 = vect_recog_temp_ssa_var (itype, NULL);
2780 def_stmt
2781 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2783 if (post_shift != 1)
2785 append_pattern_def_seq (stmt_vinfo, def_stmt);
2787 q = vect_recog_temp_ssa_var (itype, NULL);
2788 pattern_stmt
2789 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2790 build_int_cst (itype, post_shift - 1));
2792 else
2794 q = t4;
2795 pattern_stmt = def_stmt;
2798 else
2800 if (pre_shift >= prec || post_shift >= prec)
2801 return NULL;
2803 /* t1 = oprnd0 >> pre_shift;
2804 t2 = t1 h* ml;
2805 q = t2 >> post_shift; */
2806 if (pre_shift)
2808 t1 = vect_recog_temp_ssa_var (itype, NULL);
2809 def_stmt
2810 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2811 build_int_cst (NULL, pre_shift));
2812 append_pattern_def_seq (stmt_vinfo, def_stmt);
2814 else
2815 t1 = oprnd0;
2817 t2 = vect_recog_temp_ssa_var (itype, NULL);
2818 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2819 build_int_cst (itype, ml));
2821 if (post_shift)
2823 append_pattern_def_seq (stmt_vinfo, def_stmt);
2825 q = vect_recog_temp_ssa_var (itype, NULL);
2826 def_stmt
2827 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2828 build_int_cst (itype, post_shift));
2830 else
2831 q = t2;
2833 pattern_stmt = def_stmt;
2836 else
2838 unsigned HOST_WIDE_INT ml;
2839 int post_shift;
2840 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2841 unsigned HOST_WIDE_INT abs_d;
2842 bool add = false;
2843 tree t1, t2, t3, t4;
2845 /* Give up for -1. */
2846 if (d == -1)
2847 return NULL;
2849 /* Since d might be INT_MIN, we have to cast to
2850 unsigned HOST_WIDE_INT before negating to avoid
2851 undefined signed overflow. */
2852 abs_d = (d >= 0
2853 ? (unsigned HOST_WIDE_INT) d
2854 : - (unsigned HOST_WIDE_INT) d);
2856 /* n rem d = n rem -d */
2857 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2859 d = abs_d;
2860 oprnd1 = build_int_cst (itype, abs_d);
2862 else if (HOST_BITS_PER_WIDE_INT >= prec
2863 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2864 /* This case is not handled correctly below. */
2865 return NULL;
2867 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2868 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2870 add = true;
2871 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2873 if (post_shift >= prec)
2874 return NULL;
2876 /* t1 = oprnd0 h* ml; */
2877 t1 = vect_recog_temp_ssa_var (itype, NULL);
2878 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2879 build_int_cst (itype, ml));
2881 if (add)
2883 /* t2 = t1 + oprnd0; */
2884 append_pattern_def_seq (stmt_vinfo, def_stmt);
2885 t2 = vect_recog_temp_ssa_var (itype, NULL);
2886 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2888 else
2889 t2 = t1;
2891 if (post_shift)
2893 /* t3 = t2 >> post_shift; */
2894 append_pattern_def_seq (stmt_vinfo, def_stmt);
2895 t3 = vect_recog_temp_ssa_var (itype, NULL);
2896 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2897 build_int_cst (itype, post_shift));
2899 else
2900 t3 = t2;
2902 wide_int oprnd0_min, oprnd0_max;
2903 int msb = 1;
2904 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2906 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2907 msb = 0;
2908 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2909 msb = -1;
2912 if (msb == 0 && d >= 0)
2914 /* q = t3; */
2915 q = t3;
2916 pattern_stmt = def_stmt;
2918 else
2920 /* t4 = oprnd0 >> (prec - 1);
2921 or if we know from VRP that oprnd0 >= 0
2922 t4 = 0;
2923 or if we know from VRP that oprnd0 < 0
2924 t4 = -1; */
2925 append_pattern_def_seq (stmt_vinfo, def_stmt);
2926 t4 = vect_recog_temp_ssa_var (itype, NULL);
2927 if (msb != 1)
2928 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2929 build_int_cst (itype, msb));
2930 else
2931 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2932 build_int_cst (itype, prec - 1));
2933 append_pattern_def_seq (stmt_vinfo, def_stmt);
2935 /* q = t3 - t4; or q = t4 - t3; */
2936 q = vect_recog_temp_ssa_var (itype, NULL);
2937 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2938 d < 0 ? t3 : t4);
2942 if (rhs_code == TRUNC_MOD_EXPR)
2944 tree r, t1;
2946 /* We divided. Now finish by:
2947 t1 = q * oprnd1;
2948 r = oprnd0 - t1; */
2949 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2951 t1 = vect_recog_temp_ssa_var (itype, NULL);
2952 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2953 append_pattern_def_seq (stmt_vinfo, def_stmt);
2955 r = vect_recog_temp_ssa_var (itype, NULL);
2956 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2959 /* Pattern detected. */
2960 if (dump_enabled_p ())
2962 dump_printf_loc (MSG_NOTE, vect_location,
2963 "vect_recog_divmod_pattern: detected: ");
2964 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2967 stmts->safe_push (last_stmt);
2969 *type_in = vectype;
2970 *type_out = vectype;
2971 return pattern_stmt;
2974 /* Function vect_recog_mixed_size_cond_pattern
2976 Try to find the following pattern:
2978 type x_t, y_t;
2979 TYPE a_T, b_T, c_T;
2980 loop:
2981 S1 a_T = x_t CMP y_t ? b_T : c_T;
2983 where type 'TYPE' is an integral type which has different size
2984 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2985 than 'type', the constants need to fit into an integer type
2986 with the same width as 'type') or results of conversion from 'type'.
2988 Input:
2990 * LAST_STMT: A stmt from which the pattern search begins.
2992 Output:
2994 * TYPE_IN: The type of the input arguments to the pattern.
2996 * TYPE_OUT: The type of the output of this pattern.
2998 * Return value: A new stmt that will be used to replace the pattern.
2999 Additionally a def_stmt is added.
3001 a_it = x_t CMP y_t ? b_it : c_it;
3002 a_T = (TYPE) a_it; */
3004 static gimple *
3005 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
3006 tree *type_out)
3008 gimple *last_stmt = (*stmts)[0];
3009 tree cond_expr, then_clause, else_clause;
3010 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3011 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3012 gimple *pattern_stmt, *def_stmt;
3013 vec_info *vinfo = stmt_vinfo->vinfo;
3014 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3015 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3016 bool promotion;
3017 tree comp_scalar_type;
3019 if (!is_gimple_assign (last_stmt)
3020 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3021 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3022 return NULL;
3024 cond_expr = gimple_assign_rhs1 (last_stmt);
3025 then_clause = gimple_assign_rhs2 (last_stmt);
3026 else_clause = gimple_assign_rhs3 (last_stmt);
3028 if (!COMPARISON_CLASS_P (cond_expr))
3029 return NULL;
3031 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3032 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3033 if (comp_vectype == NULL_TREE)
3034 return NULL;
3036 type = gimple_expr_type (last_stmt);
3037 if (types_compatible_p (type, comp_scalar_type)
3038 || ((TREE_CODE (then_clause) != INTEGER_CST
3039 || TREE_CODE (else_clause) != INTEGER_CST)
3040 && !INTEGRAL_TYPE_P (comp_scalar_type))
3041 || !INTEGRAL_TYPE_P (type))
3042 return NULL;
3044 if ((TREE_CODE (then_clause) != INTEGER_CST
3045 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3046 &def_stmt0, &promotion))
3047 || (TREE_CODE (else_clause) != INTEGER_CST
3048 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3049 &def_stmt1, &promotion)))
3050 return NULL;
3052 if (orig_type0 && orig_type1
3053 && !types_compatible_p (orig_type0, orig_type1))
3054 return NULL;
3056 if (orig_type0)
3058 if (!types_compatible_p (orig_type0, comp_scalar_type))
3059 return NULL;
3060 then_clause = gimple_assign_rhs1 (def_stmt0);
3061 itype = orig_type0;
3064 if (orig_type1)
3066 if (!types_compatible_p (orig_type1, comp_scalar_type))
3067 return NULL;
3068 else_clause = gimple_assign_rhs1 (def_stmt1);
3069 itype = orig_type1;
3073 HOST_WIDE_INT cmp_mode_size
3074 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3076 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3077 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3078 return NULL;
3080 vectype = get_vectype_for_scalar_type (type);
3081 if (vectype == NULL_TREE)
3082 return NULL;
3084 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3085 return NULL;
3087 if (itype == NULL_TREE)
3088 itype = build_nonstandard_integer_type (cmp_mode_size,
3089 TYPE_UNSIGNED (type));
3091 if (itype == NULL_TREE
3092 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3093 return NULL;
3095 vecitype = get_vectype_for_scalar_type (itype);
3096 if (vecitype == NULL_TREE)
3097 return NULL;
3099 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3100 return NULL;
3102 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3104 if ((TREE_CODE (then_clause) == INTEGER_CST
3105 && !int_fits_type_p (then_clause, itype))
3106 || (TREE_CODE (else_clause) == INTEGER_CST
3107 && !int_fits_type_p (else_clause, itype)))
3108 return NULL;
3111 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3112 COND_EXPR, unshare_expr (cond_expr),
3113 fold_convert (itype, then_clause),
3114 fold_convert (itype, else_clause));
3115 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3116 NOP_EXPR, gimple_assign_lhs (def_stmt));
3118 new_pattern_def_seq (stmt_vinfo, def_stmt);
3119 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3120 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3121 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3122 *type_in = vecitype;
3123 *type_out = vectype;
3125 if (dump_enabled_p ())
3126 dump_printf_loc (MSG_NOTE, vect_location,
3127 "vect_recog_mixed_size_cond_pattern: detected:\n");
3129 return pattern_stmt;
3133 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3134 true if bool VAR can and should be optimized that way. Assume it shouldn't
3135 in case it's a result of a comparison which can be directly vectorized into
3136 a vector comparison. Fills in STMTS with all stmts visited during the
3137 walk. */
3139 static bool
3140 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3142 gimple *def_stmt;
3143 enum vect_def_type dt;
3144 tree rhs1;
3145 enum tree_code rhs_code;
3147 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3148 return false;
3150 if (dt != vect_internal_def)
3151 return false;
3153 if (!is_gimple_assign (def_stmt))
3154 return false;
3156 if (stmts.contains (def_stmt))
3157 return true;
3159 rhs1 = gimple_assign_rhs1 (def_stmt);
3160 rhs_code = gimple_assign_rhs_code (def_stmt);
3161 switch (rhs_code)
3163 case SSA_NAME:
3164 if (! check_bool_pattern (rhs1, vinfo, stmts))
3165 return false;
3166 break;
3168 CASE_CONVERT:
3169 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3170 return false;
3171 if (! check_bool_pattern (rhs1, vinfo, stmts))
3172 return false;
3173 break;
3175 case BIT_NOT_EXPR:
3176 if (! check_bool_pattern (rhs1, vinfo, stmts))
3177 return false;
3178 break;
3180 case BIT_AND_EXPR:
3181 case BIT_IOR_EXPR:
3182 case BIT_XOR_EXPR:
3183 if (! check_bool_pattern (rhs1, vinfo, stmts)
3184 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3185 return false;
3186 break;
3188 default:
3189 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3191 tree vecitype, comp_vectype;
3193 /* If the comparison can throw, then is_gimple_condexpr will be
3194 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3195 if (stmt_could_throw_p (def_stmt))
3196 return false;
3198 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3199 if (comp_vectype == NULL_TREE)
3200 return false;
3202 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3203 if (mask_type
3204 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3205 return false;
3207 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3209 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3210 tree itype
3211 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3212 vecitype = get_vectype_for_scalar_type (itype);
3213 if (vecitype == NULL_TREE)
3214 return false;
3216 else
3217 vecitype = comp_vectype;
3218 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3219 return false;
3221 else
3222 return false;
3223 break;
3226 bool res = stmts.add (def_stmt);
3227 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3228 gcc_assert (!res);
3230 return true;
3234 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3235 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3236 pattern sequence. */
3238 static tree
3239 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3241 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3242 NOP_EXPR, var);
3243 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3244 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3245 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3246 append_pattern_def_seq (stmt_info, cast_stmt);
3247 return gimple_assign_lhs (cast_stmt);
3250 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3251 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3252 type, OUT_TYPE is the desired final integer type of the whole pattern.
3253 STMT_INFO is the info of the pattern root and is where pattern stmts should
3254 be associated with. DEFS is a map of pattern defs. */
3256 static void
3257 adjust_bool_pattern (tree var, tree out_type,
3258 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3260 gimple *stmt = SSA_NAME_DEF_STMT (var);
3261 enum tree_code rhs_code, def_rhs_code;
3262 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3263 location_t loc;
3264 gimple *pattern_stmt, *def_stmt;
3265 tree trueval = NULL_TREE;
3267 rhs1 = gimple_assign_rhs1 (stmt);
3268 rhs2 = gimple_assign_rhs2 (stmt);
3269 rhs_code = gimple_assign_rhs_code (stmt);
3270 loc = gimple_location (stmt);
3271 switch (rhs_code)
3273 case SSA_NAME:
3274 CASE_CONVERT:
3275 irhs1 = *defs.get (rhs1);
3276 itype = TREE_TYPE (irhs1);
3277 pattern_stmt
3278 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3279 SSA_NAME, irhs1);
3280 break;
3282 case BIT_NOT_EXPR:
3283 irhs1 = *defs.get (rhs1);
3284 itype = TREE_TYPE (irhs1);
3285 pattern_stmt
3286 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3287 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3288 break;
3290 case BIT_AND_EXPR:
3291 /* Try to optimize x = y & (a < b ? 1 : 0); into
3292 x = (a < b ? y : 0);
3294 E.g. for:
3295 bool a_b, b_b, c_b;
3296 TYPE d_T;
3298 S1 a_b = x1 CMP1 y1;
3299 S2 b_b = x2 CMP2 y2;
3300 S3 c_b = a_b & b_b;
3301 S4 d_T = (TYPE) c_b;
3303 we would normally emit:
3305 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3306 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3307 S3' c_T = a_T & b_T;
3308 S4' d_T = c_T;
3310 but we can save one stmt by using the
3311 result of one of the COND_EXPRs in the other COND_EXPR and leave
3312 BIT_AND_EXPR stmt out:
3314 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3315 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3316 S4' f_T = c_T;
3318 At least when VEC_COND_EXPR is implemented using masks
3319 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3320 computes the comparison masks and ands it, in one case with
3321 all ones vector, in the other case with a vector register.
3322 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3323 often more expensive. */
3324 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3325 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3326 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3328 irhs1 = *defs.get (rhs1);
3329 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3330 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3331 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3333 rhs_code = def_rhs_code;
3334 rhs1 = def_rhs1;
3335 rhs2 = gimple_assign_rhs2 (def_stmt);
3336 trueval = irhs1;
3337 goto do_compare;
3339 else
3340 irhs2 = *defs.get (rhs2);
3341 goto and_ior_xor;
3343 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3344 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3345 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3347 irhs2 = *defs.get (rhs2);
3348 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3349 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3350 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3352 rhs_code = def_rhs_code;
3353 rhs1 = def_rhs1;
3354 rhs2 = gimple_assign_rhs2 (def_stmt);
3355 trueval = irhs2;
3356 goto do_compare;
3358 else
3359 irhs1 = *defs.get (rhs1);
3360 goto and_ior_xor;
3362 /* FALLTHRU */
3363 case BIT_IOR_EXPR:
3364 case BIT_XOR_EXPR:
3365 irhs1 = *defs.get (rhs1);
3366 irhs2 = *defs.get (rhs2);
3367 and_ior_xor:
3368 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3369 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3371 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3372 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3373 int out_prec = TYPE_PRECISION (out_type);
3374 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3375 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3376 stmt_info);
3377 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3378 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3379 stmt_info);
3380 else
3382 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3383 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3386 itype = TREE_TYPE (irhs1);
3387 pattern_stmt
3388 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3389 rhs_code, irhs1, irhs2);
3390 break;
3392 default:
3393 do_compare:
3394 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3395 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3396 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3397 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3398 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3400 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3401 itype
3402 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3404 else
3405 itype = TREE_TYPE (rhs1);
3406 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3407 if (trueval == NULL_TREE)
3408 trueval = build_int_cst (itype, 1);
3409 else
3410 gcc_checking_assert (useless_type_conversion_p (itype,
3411 TREE_TYPE (trueval)));
3412 pattern_stmt
3413 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3414 COND_EXPR, cond_expr, trueval,
3415 build_int_cst (itype, 0));
3416 break;
3419 gimple_set_location (pattern_stmt, loc);
3420 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3421 pattern def seq stmts instead of just letting auto-detection do
3422 its work? */
3423 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3424 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3425 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3426 append_pattern_def_seq (stmt_info, pattern_stmt);
3427 defs.put (var, gimple_assign_lhs (pattern_stmt));
3430 /* Comparison function to qsort a vector of gimple stmts after UID. */
3432 static int
3433 sort_after_uid (const void *p1, const void *p2)
3435 const gimple *stmt1 = *(const gimple * const *)p1;
3436 const gimple *stmt2 = *(const gimple * const *)p2;
3437 return gimple_uid (stmt1) - gimple_uid (stmt2);
3440 /* Create pattern stmts for all stmts participating in the bool pattern
3441 specified by BOOL_STMT_SET and its root STMT with the desired type
3442 OUT_TYPE. Return the def of the pattern root. */
3444 static tree
3445 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3446 tree out_type, gimple *stmt)
3448 /* Gather original stmts in the bool pattern in their order of appearance
3449 in the IL. */
3450 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3451 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3452 i != bool_stmt_set.end (); ++i)
3453 bool_stmts.quick_push (*i);
3454 bool_stmts.qsort (sort_after_uid);
3456 /* Now process them in that order, producing pattern stmts. */
3457 hash_map <tree, tree> defs;
3458 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3459 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3460 out_type, vinfo_for_stmt (stmt), defs);
3462 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3463 gimple *pattern_stmt
3464 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3465 return gimple_assign_lhs (pattern_stmt);
3468 /* Helper for search_type_for_mask. */
3470 static tree
3471 search_type_for_mask_1 (tree var, vec_info *vinfo,
3472 hash_map<gimple *, tree> &cache)
3474 gimple *def_stmt;
3475 enum vect_def_type dt;
3476 tree rhs1;
3477 enum tree_code rhs_code;
3478 tree res = NULL_TREE, res2;
3480 if (TREE_CODE (var) != SSA_NAME)
3481 return NULL_TREE;
3483 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3484 return NULL_TREE;
3486 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3487 return NULL_TREE;
3489 if (dt != vect_internal_def)
3490 return NULL_TREE;
3492 if (!is_gimple_assign (def_stmt))
3493 return NULL_TREE;
3495 tree *c = cache.get (def_stmt);
3496 if (c)
3497 return *c;
3499 rhs_code = gimple_assign_rhs_code (def_stmt);
3500 rhs1 = gimple_assign_rhs1 (def_stmt);
3502 switch (rhs_code)
3504 case SSA_NAME:
3505 case BIT_NOT_EXPR:
3506 CASE_CONVERT:
3507 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3508 break;
3510 case BIT_AND_EXPR:
3511 case BIT_IOR_EXPR:
3512 case BIT_XOR_EXPR:
3513 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3514 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3515 cache);
3516 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3517 res = res2;
3518 break;
3520 default:
3521 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3523 tree comp_vectype, mask_type;
3525 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3527 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3528 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3529 vinfo, cache);
3530 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3531 res = res2;
3532 break;
3535 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3536 if (comp_vectype == NULL_TREE)
3538 res = NULL_TREE;
3539 break;
3542 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3543 if (!mask_type
3544 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3546 res = NULL_TREE;
3547 break;
3550 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3551 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3553 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3554 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3556 else
3557 res = TREE_TYPE (rhs1);
3561 cache.put (def_stmt, res);
3562 return res;
3565 /* Return the proper type for converting bool VAR into
3566 an integer value or NULL_TREE if no such type exists.
3567 The type is chosen so that converted value has the
3568 same number of elements as VAR's vector type. */
3570 static tree
3571 search_type_for_mask (tree var, vec_info *vinfo)
3573 hash_map<gimple *, tree> cache;
3574 return search_type_for_mask_1 (var, vinfo, cache);
3577 /* Function vect_recog_bool_pattern
3579 Try to find pattern like following:
3581 bool a_b, b_b, c_b, d_b, e_b;
3582 TYPE f_T;
3583 loop:
3584 S1 a_b = x1 CMP1 y1;
3585 S2 b_b = x2 CMP2 y2;
3586 S3 c_b = a_b & b_b;
3587 S4 d_b = x3 CMP3 y3;
3588 S5 e_b = c_b | d_b;
3589 S6 f_T = (TYPE) e_b;
3591 where type 'TYPE' is an integral type. Or a similar pattern
3592 ending in
3594 S6 f_Y = e_b ? r_Y : s_Y;
3596 as results from if-conversion of a complex condition.
3598 Input:
3600 * LAST_STMT: A stmt at the end from which the pattern
3601 search begins, i.e. cast of a bool to
3602 an integer type.
3604 Output:
3606 * TYPE_IN: The type of the input arguments to the pattern.
3608 * TYPE_OUT: The type of the output of this pattern.
3610 * Return value: A new stmt that will be used to replace the pattern.
3612 Assuming size of TYPE is the same as size of all comparisons
3613 (otherwise some casts would be added where needed), the above
3614 sequence we create related pattern stmts:
3615 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3616 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3617 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3618 S5' e_T = c_T | d_T;
3619 S6' f_T = e_T;
3621 Instead of the above S3' we could emit:
3622 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3623 S3' c_T = a_T | b_T;
3624 but the above is more efficient. */
3626 static gimple *
3627 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3628 tree *type_out)
3630 gimple *last_stmt = stmts->pop ();
3631 enum tree_code rhs_code;
3632 tree var, lhs, rhs, vectype;
3633 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3634 stmt_vec_info new_stmt_info;
3635 vec_info *vinfo = stmt_vinfo->vinfo;
3636 gimple *pattern_stmt;
3638 if (!is_gimple_assign (last_stmt))
3639 return NULL;
3641 var = gimple_assign_rhs1 (last_stmt);
3642 lhs = gimple_assign_lhs (last_stmt);
3644 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3645 return NULL;
3647 hash_set<gimple *> bool_stmts;
3649 rhs_code = gimple_assign_rhs_code (last_stmt);
3650 if (CONVERT_EXPR_CODE_P (rhs_code))
3652 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3653 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3654 return NULL;
3655 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3656 if (vectype == NULL_TREE)
3657 return NULL;
3659 if (check_bool_pattern (var, vinfo, bool_stmts))
3661 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3662 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3663 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3664 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3665 else
3666 pattern_stmt
3667 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3669 else
3671 tree type = search_type_for_mask (var, vinfo);
3672 tree cst0, cst1, tmp;
3674 if (!type)
3675 return NULL;
3677 /* We may directly use cond with narrowed type to avoid
3678 multiple cond exprs with following result packing and
3679 perform single cond with packed mask instead. In case
3680 of widening we better make cond first and then extract
3681 results. */
3682 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3683 type = TREE_TYPE (lhs);
3685 cst0 = build_int_cst (type, 0);
3686 cst1 = build_int_cst (type, 1);
3687 tmp = vect_recog_temp_ssa_var (type, NULL);
3688 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3690 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3692 tree new_vectype = get_vectype_for_scalar_type (type);
3693 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3694 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3695 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3696 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3698 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3699 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3703 *type_out = vectype;
3704 *type_in = vectype;
3705 stmts->safe_push (last_stmt);
3706 if (dump_enabled_p ())
3707 dump_printf_loc (MSG_NOTE, vect_location,
3708 "vect_recog_bool_pattern: detected:\n");
3710 return pattern_stmt;
3712 else if (rhs_code == COND_EXPR
3713 && TREE_CODE (var) == SSA_NAME)
3715 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3716 if (vectype == NULL_TREE)
3717 return NULL;
3719 /* Build a scalar type for the boolean result that when
3720 vectorized matches the vector type of the result in
3721 size and number of elements. */
3722 unsigned prec
3723 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3724 TYPE_VECTOR_SUBPARTS (vectype));
3726 tree type
3727 = build_nonstandard_integer_type (prec,
3728 TYPE_UNSIGNED (TREE_TYPE (var)));
3729 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3730 return NULL;
3732 if (!check_bool_pattern (var, vinfo, bool_stmts))
3733 return NULL;
3735 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3737 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3738 pattern_stmt
3739 = gimple_build_assign (lhs, COND_EXPR,
3740 build2 (NE_EXPR, boolean_type_node,
3741 rhs, build_int_cst (type, 0)),
3742 gimple_assign_rhs2 (last_stmt),
3743 gimple_assign_rhs3 (last_stmt));
3744 *type_out = vectype;
3745 *type_in = vectype;
3746 stmts->safe_push (last_stmt);
3747 if (dump_enabled_p ())
3748 dump_printf_loc (MSG_NOTE, vect_location,
3749 "vect_recog_bool_pattern: detected:\n");
3751 return pattern_stmt;
3753 else if (rhs_code == SSA_NAME
3754 && STMT_VINFO_DATA_REF (stmt_vinfo))
3756 stmt_vec_info pattern_stmt_info;
3757 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3758 gcc_assert (vectype != NULL_TREE);
3759 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3760 return NULL;
3762 if (check_bool_pattern (var, vinfo, bool_stmts))
3763 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3764 else
3766 tree type = search_type_for_mask (var, vinfo);
3767 tree cst0, cst1, new_vectype;
3769 if (!type)
3770 return NULL;
3772 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3773 type = TREE_TYPE (vectype);
3775 cst0 = build_int_cst (type, 0);
3776 cst1 = build_int_cst (type, 1);
3777 new_vectype = get_vectype_for_scalar_type (type);
3779 rhs = vect_recog_temp_ssa_var (type, NULL);
3780 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3782 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3783 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3784 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3785 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3788 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3789 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3791 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3792 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3793 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3794 rhs = rhs2;
3796 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3797 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3798 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3799 STMT_VINFO_DATA_REF (pattern_stmt_info)
3800 = STMT_VINFO_DATA_REF (stmt_vinfo);
3801 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3802 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3803 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3804 *type_out = vectype;
3805 *type_in = vectype;
3806 stmts->safe_push (last_stmt);
3807 if (dump_enabled_p ())
3808 dump_printf_loc (MSG_NOTE, vect_location,
3809 "vect_recog_bool_pattern: detected:\n");
3810 return pattern_stmt;
3812 else
3813 return NULL;
3817 /* A helper for vect_recog_mask_conversion_pattern. Build
3818 conversion of MASK to a type suitable for masking VECTYPE.
3819 Built statement gets required vectype and is appended to
3820 a pattern sequence of STMT_VINFO.
3822 Return converted mask. */
3824 static tree
3825 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3826 vec_info *vinfo)
3828 gimple *stmt;
3829 tree masktype, tmp;
3830 stmt_vec_info new_stmt_info;
3832 masktype = build_same_sized_truth_vector_type (vectype);
3833 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3834 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3835 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3836 set_vinfo_for_stmt (stmt, new_stmt_info);
3837 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3838 append_pattern_def_seq (stmt_vinfo, stmt);
3840 return tmp;
3844 /* Function vect_recog_mask_conversion_pattern
3846 Try to find statements which require boolean type
3847 converison. Additional conversion statements are
3848 added to handle such cases. For example:
3850 bool m_1, m_2, m_3;
3851 int i_4, i_5;
3852 double d_6, d_7;
3853 char c_1, c_2, c_3;
3855 S1 m_1 = i_4 > i_5;
3856 S2 m_2 = d_6 < d_7;
3857 S3 m_3 = m_1 & m_2;
3858 S4 c_1 = m_3 ? c_2 : c_3;
3860 Will be transformed into:
3862 S1 m_1 = i_4 > i_5;
3863 S2 m_2 = d_6 < d_7;
3864 S3'' m_2' = (_Bool[bitsize=32])m_2
3865 S3' m_3' = m_1 & m_2';
3866 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3867 S4' c_1' = m_3'' ? c_2 : c_3; */
3869 static gimple *
3870 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3871 tree *type_out)
3873 gimple *last_stmt = stmts->pop ();
3874 enum tree_code rhs_code;
3875 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3876 tree vectype1, vectype2;
3877 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3878 stmt_vec_info pattern_stmt_info;
3879 vec_info *vinfo = stmt_vinfo->vinfo;
3881 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3882 if (is_gimple_call (last_stmt)
3883 && gimple_call_internal_p (last_stmt)
3884 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3885 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3887 gcall *pattern_stmt;
3888 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3890 if (load)
3892 lhs = gimple_call_lhs (last_stmt);
3893 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3895 else
3897 rhs2 = gimple_call_arg (last_stmt, 3);
3898 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3901 rhs1 = gimple_call_arg (last_stmt, 2);
3902 rhs1_type = search_type_for_mask (rhs1, vinfo);
3903 if (!rhs1_type)
3904 return NULL;
3905 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3907 if (!vectype1 || !vectype2
3908 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3909 TYPE_VECTOR_SUBPARTS (vectype2)))
3910 return NULL;
3912 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3914 if (load)
3916 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3917 pattern_stmt
3918 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3919 gimple_call_arg (last_stmt, 0),
3920 gimple_call_arg (last_stmt, 1),
3921 tmp);
3922 gimple_call_set_lhs (pattern_stmt, lhs);
3924 else
3925 pattern_stmt
3926 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3927 gimple_call_arg (last_stmt, 0),
3928 gimple_call_arg (last_stmt, 1),
3929 tmp,
3930 gimple_call_arg (last_stmt, 3));
3932 gimple_call_set_nothrow (pattern_stmt, true);
3934 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3935 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3936 STMT_VINFO_DATA_REF (pattern_stmt_info)
3937 = STMT_VINFO_DATA_REF (stmt_vinfo);
3938 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3939 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3940 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3942 *type_out = vectype1;
3943 *type_in = vectype1;
3944 stmts->safe_push (last_stmt);
3945 if (dump_enabled_p ())
3946 dump_printf_loc (MSG_NOTE, vect_location,
3947 "vect_recog_mask_conversion_pattern: detected:\n");
3949 return pattern_stmt;
3952 if (!is_gimple_assign (last_stmt))
3953 return NULL;
3955 gimple *pattern_stmt;
3956 lhs = gimple_assign_lhs (last_stmt);
3957 rhs1 = gimple_assign_rhs1 (last_stmt);
3958 rhs_code = gimple_assign_rhs_code (last_stmt);
3960 /* Check for cond expression requiring mask conversion. */
3961 if (rhs_code == COND_EXPR)
3963 /* vect_recog_mixed_size_cond_pattern could apply.
3964 Do nothing then. */
3965 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3966 return NULL;
3968 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3970 if (TREE_CODE (rhs1) == SSA_NAME)
3972 rhs1_type = search_type_for_mask (rhs1, vinfo);
3973 if (!rhs1_type)
3974 return NULL;
3976 else if (COMPARISON_CLASS_P (rhs1))
3978 /* Check whether we're comparing scalar booleans and (if so)
3979 whether a better mask type exists than the mask associated
3980 with boolean-sized elements. This avoids unnecessary packs
3981 and unpacks if the booleans are set from comparisons of
3982 wider types. E.g. in:
3984 int x1, x2, x3, x4, y1, y1;
3986 bool b1 = (x1 == x2);
3987 bool b2 = (x3 == x4);
3988 ... = b1 == b2 ? y1 : y2;
3990 it is better for b1 and b2 to use the mask type associated
3991 with int elements rather bool (byte) elements. */
3992 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
3993 if (!rhs1_type)
3994 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3996 else
3997 return NULL;
3999 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4001 if (!vectype1 || !vectype2)
4002 return NULL;
4004 /* Continue if a conversion is needed. Also continue if we have
4005 a comparison whose vector type would normally be different from
4006 VECTYPE2 when considered in isolation. In that case we'll
4007 replace the comparison with an SSA name (so that we can record
4008 its vector type) and behave as though the comparison was an SSA
4009 name from the outset. */
4010 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4011 TYPE_VECTOR_SUBPARTS (vectype2))
4012 && (TREE_CODE (rhs1) == SSA_NAME
4013 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4014 return NULL;
4016 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4017 in place, we can handle it in vectorizable_condition. This avoids
4018 unnecessary promotion stmts and increased vectorization factor. */
4019 if (COMPARISON_CLASS_P (rhs1)
4020 && INTEGRAL_TYPE_P (rhs1_type)
4021 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4022 TYPE_VECTOR_SUBPARTS (vectype2)))
4024 gimple *dummy;
4025 enum vect_def_type dt;
4026 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
4027 &dummy, &dt)
4028 && dt == vect_external_def
4029 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
4030 &dummy, &dt)
4031 && (dt == vect_external_def
4032 || dt == vect_constant_def))
4034 tree wide_scalar_type = build_nonstandard_integer_type
4035 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4036 TYPE_UNSIGNED (rhs1_type));
4037 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4038 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4039 return NULL;
4043 /* If rhs1 is a comparison we need to move it into a
4044 separate statement. */
4045 if (TREE_CODE (rhs1) != SSA_NAME)
4047 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4048 pattern_stmt = gimple_build_assign (tmp, rhs1);
4049 rhs1 = tmp;
4051 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4052 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4053 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4054 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4057 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4058 TYPE_VECTOR_SUBPARTS (vectype2)))
4059 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4060 else
4061 tmp = rhs1;
4063 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4064 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4065 gimple_assign_rhs2 (last_stmt),
4066 gimple_assign_rhs3 (last_stmt));
4068 *type_out = vectype1;
4069 *type_in = vectype1;
4070 stmts->safe_push (last_stmt);
4071 if (dump_enabled_p ())
4072 dump_printf_loc (MSG_NOTE, vect_location,
4073 "vect_recog_mask_conversion_pattern: detected:\n");
4075 return pattern_stmt;
4078 /* Now check for binary boolean operations requiring conversion for
4079 one of operands. */
4080 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4081 return NULL;
4083 if (rhs_code != BIT_IOR_EXPR
4084 && rhs_code != BIT_XOR_EXPR
4085 && rhs_code != BIT_AND_EXPR
4086 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4087 return NULL;
4089 rhs2 = gimple_assign_rhs2 (last_stmt);
4091 rhs1_type = search_type_for_mask (rhs1, vinfo);
4092 rhs2_type = search_type_for_mask (rhs2, vinfo);
4094 if (!rhs1_type || !rhs2_type
4095 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4096 return NULL;
4098 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4100 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4101 if (!vectype1)
4102 return NULL;
4103 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4105 else
4107 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4108 if (!vectype1)
4109 return NULL;
4110 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4113 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4114 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4116 *type_out = vectype1;
4117 *type_in = vectype1;
4118 stmts->safe_push (last_stmt);
4119 if (dump_enabled_p ())
4120 dump_printf_loc (MSG_NOTE, vect_location,
4121 "vect_recog_mask_conversion_pattern: detected:\n");
4123 return pattern_stmt;
4126 /* STMT is a load or store. If the load or store is conditional, return
4127 the boolean condition under which it occurs, otherwise return null. */
4129 static tree
4130 vect_get_load_store_mask (gimple *stmt)
4132 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4134 gcc_assert (gimple_assign_single_p (def_assign));
4135 return NULL_TREE;
4138 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4140 internal_fn ifn = gimple_call_internal_fn (def_call);
4141 int mask_index = internal_fn_mask_index (ifn);
4142 return gimple_call_arg (def_call, mask_index);
4145 gcc_unreachable ();
4148 /* Return the scalar offset type that an internal gather/scatter function
4149 should use. GS_INFO describes the gather/scatter operation. */
4151 static tree
4152 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4154 tree offset_type = TREE_TYPE (gs_info->offset);
4155 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4157 /* Enforced by vect_check_gather_scatter. */
4158 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4159 gcc_assert (element_bits >= offset_bits);
4161 /* If the offset is narrower than the elements, extend it according
4162 to its sign. */
4163 if (element_bits > offset_bits)
4164 return build_nonstandard_integer_type (element_bits,
4165 TYPE_UNSIGNED (offset_type));
4167 return offset_type;
4170 /* Return MASK if MASK is suitable for masking an operation on vectors
4171 of type VECTYPE, otherwise convert it into such a form and return
4172 the result. Associate any conversion statements with STMT_INFO's
4173 pattern. */
4175 static tree
4176 vect_convert_mask_for_vectype (tree mask, tree vectype,
4177 stmt_vec_info stmt_info, vec_info *vinfo)
4179 tree mask_type = search_type_for_mask (mask, vinfo);
4180 if (mask_type)
4182 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4183 if (mask_vectype
4184 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4185 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4186 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4188 return mask;
4191 /* Return the equivalent of:
4193 fold_convert (TYPE, VALUE)
4195 with the expectation that the operation will be vectorized.
4196 If new statements are needed, add them as pattern statements
4197 to STMT_INFO. */
4199 static tree
4200 vect_add_conversion_to_patterm (tree type, tree value,
4201 stmt_vec_info stmt_info,
4202 vec_info *vinfo)
4204 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4205 return value;
4207 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4208 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4209 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4210 set_vinfo_for_stmt (conversion, new_stmt_info);
4211 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4212 append_pattern_def_seq (stmt_info, conversion);
4213 return new_value;
4216 /* Try to convert STMT into a call to a gather load or scatter store
4217 internal function. Return the final statement on success and set
4218 *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4220 This function only handles gathers and scatters that were recognized
4221 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4223 static gimple *
4224 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4225 tree *type_in, tree *type_out)
4227 /* Currently we only support this for loop vectorization. */
4228 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4229 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4230 if (!loop_vinfo)
4231 return NULL;
4233 /* Make sure that we're looking at a gather load or scatter store. */
4234 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4235 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4236 return NULL;
4238 /* Reject stores for now. */
4239 if (!DR_IS_READ (dr))
4240 return NULL;
4242 /* Get the boolean that controls whether the load or store happens.
4243 This is null if the operation is unconditional. */
4244 tree mask = vect_get_load_store_mask (stmt);
4246 /* Make sure that the target supports an appropriate internal
4247 function for the gather/scatter operation. */
4248 gather_scatter_info gs_info;
4249 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4250 || gs_info.decl)
4251 return NULL;
4253 /* Convert the mask to the right form. */
4254 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4255 if (mask)
4256 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4257 loop_vinfo);
4259 /* Get the invariant base and non-invariant offset, converting the
4260 latter to the same width as the vector elements. */
4261 tree base = gs_info.base;
4262 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4263 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4264 last_stmt_info, loop_vinfo);
4266 /* Build the new pattern statement. */
4267 tree scale = size_int (gs_info.scale);
4268 gcall *pattern_stmt;
4269 if (DR_IS_READ (dr))
4271 if (mask != NULL)
4272 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4273 offset, scale, mask);
4274 else
4275 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4276 offset, scale);
4277 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4278 gimple_call_set_lhs (pattern_stmt, load_lhs);
4280 else
4281 /* Not yet supported. */
4282 gcc_unreachable ();
4283 gimple_call_set_nothrow (pattern_stmt, true);
4285 /* Copy across relevant vectorization info and associate DR with the
4286 new pattern statement instead of the original statement. */
4287 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4288 loop_vinfo);
4289 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4290 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4291 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4292 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4293 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4294 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4295 DR_STMT (dr) = pattern_stmt;
4297 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4298 *type_out = vectype;
4299 *type_in = vectype;
4301 if (dump_enabled_p ())
4302 dump_printf_loc (MSG_NOTE, vect_location,
4303 "gather/scatter pattern detected:\n");
4305 return pattern_stmt;
4308 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4310 static gimple *
4311 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
4312 tree *type_out)
4314 gimple *last_stmt = stmts->pop ();
4315 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4316 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4317 last_stmt_info,
4318 type_in, type_out);
4319 if (pattern_stmt)
4320 stmts->safe_push (last_stmt);
4321 return pattern_stmt;
4324 /* Mark statements that are involved in a pattern. */
4326 static inline void
4327 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4328 tree pattern_vectype)
4330 stmt_vec_info pattern_stmt_info, def_stmt_info;
4331 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4332 vec_info *vinfo = orig_stmt_info->vinfo;
4333 gimple *def_stmt;
4335 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4336 if (pattern_stmt_info == NULL)
4338 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4339 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4341 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4343 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4344 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4345 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4346 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4347 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4348 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4349 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
4350 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4351 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
4353 gimple_stmt_iterator si;
4354 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
4355 !gsi_end_p (si); gsi_next (&si))
4357 def_stmt = gsi_stmt (si);
4358 def_stmt_info = vinfo_for_stmt (def_stmt);
4359 if (def_stmt_info == NULL)
4361 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4362 set_vinfo_for_stmt (def_stmt, def_stmt_info);
4364 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4365 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4366 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4367 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4368 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4373 /* Function vect_pattern_recog_1
4375 Input:
4376 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4377 computation pattern.
4378 STMT: A stmt from which the pattern search should start.
4380 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4381 expression that computes the same functionality and can be used to
4382 replace the sequence of stmts that are involved in the pattern.
4384 Output:
4385 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4386 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4387 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4388 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4389 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4390 to the available target pattern.
4392 This function also does some bookkeeping, as explained in the documentation
4393 for vect_recog_pattern. */
4395 static bool
4396 vect_pattern_recog_1 (vect_recog_func *recog_func,
4397 gimple_stmt_iterator si,
4398 vec<gimple *> *stmts_to_replace)
4400 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4401 stmt_vec_info stmt_info;
4402 loop_vec_info loop_vinfo;
4403 tree pattern_vectype;
4404 tree type_in, type_out;
4405 enum tree_code code;
4406 int i;
4407 gimple *next;
4409 stmts_to_replace->truncate (0);
4410 stmts_to_replace->quick_push (stmt);
4411 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4412 if (!pattern_stmt)
4413 return false;
4415 stmt = stmts_to_replace->last ();
4416 stmt_info = vinfo_for_stmt (stmt);
4417 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4419 if (VECTOR_BOOLEAN_TYPE_P (type_in)
4420 || VECTOR_TYPE_P (type_in))
4422 /* No need to check target support (already checked by the pattern
4423 recognition function). */
4424 pattern_vectype = type_out ? type_out : type_in;
4426 else
4428 machine_mode vec_mode;
4429 enum insn_code icode;
4430 optab optab;
4432 /* Check target support */
4433 type_in = get_vectype_for_scalar_type (type_in);
4434 if (!type_in)
4435 return false;
4436 if (type_out)
4437 type_out = get_vectype_for_scalar_type (type_out);
4438 else
4439 type_out = type_in;
4440 if (!type_out)
4441 return false;
4442 pattern_vectype = type_out;
4444 if (is_gimple_assign (pattern_stmt))
4445 code = gimple_assign_rhs_code (pattern_stmt);
4446 else
4448 gcc_assert (is_gimple_call (pattern_stmt));
4449 code = CALL_EXPR;
4452 optab = optab_for_tree_code (code, type_in, optab_default);
4453 vec_mode = TYPE_MODE (type_in);
4454 if (!optab
4455 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4456 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4457 return false;
4460 /* Found a vectorizable pattern. */
4461 if (dump_enabled_p ())
4463 dump_printf_loc (MSG_NOTE, vect_location,
4464 "%s pattern recognized: ", recog_func->name);
4465 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4468 /* Mark the stmts that are involved in the pattern. */
4469 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4471 /* Patterns cannot be vectorized using SLP, because they change the order of
4472 computation. */
4473 if (loop_vinfo)
4474 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
4475 if (next == stmt)
4476 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
4478 /* It is possible that additional pattern stmts are created and inserted in
4479 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4480 relevant statements. */
4481 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4482 && (unsigned) i < (stmts_to_replace->length () - 1);
4483 i++)
4485 stmt_info = vinfo_for_stmt (stmt);
4486 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4487 if (dump_enabled_p ())
4489 dump_printf_loc (MSG_NOTE, vect_location,
4490 "additional pattern stmt: ");
4491 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4494 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4497 return true;
4501 /* Function vect_pattern_recog
4503 Input:
4504 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4505 computation idioms.
4507 Output - for each computation idiom that is detected we create a new stmt
4508 that provides the same functionality and that can be vectorized. We
4509 also record some information in the struct_stmt_info of the relevant
4510 stmts, as explained below:
4512 At the entry to this function we have the following stmts, with the
4513 following initial value in the STMT_VINFO fields:
4515 stmt in_pattern_p related_stmt vec_stmt
4516 S1: a_i = .... - - -
4517 S2: a_2 = ..use(a_i).. - - -
4518 S3: a_1 = ..use(a_2).. - - -
4519 S4: a_0 = ..use(a_1).. - - -
4520 S5: ... = ..use(a_0).. - - -
4522 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4523 represented by a single stmt. We then:
4524 - create a new stmt S6 equivalent to the pattern (the stmt is not
4525 inserted into the code)
4526 - fill in the STMT_VINFO fields as follows:
4528 in_pattern_p related_stmt vec_stmt
4529 S1: a_i = .... - - -
4530 S2: a_2 = ..use(a_i).. - - -
4531 S3: a_1 = ..use(a_2).. - - -
4532 S4: a_0 = ..use(a_1).. true S6 -
4533 '---> S6: a_new = .... - S4 -
4534 S5: ... = ..use(a_0).. - - -
4536 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4537 to each other through the RELATED_STMT field).
4539 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4540 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4541 remain irrelevant unless used by stmts other than S4.
4543 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4544 (because they are marked as irrelevant). It will vectorize S6, and record
4545 a pointer to the new vector stmt VS6 from S6 (as usual).
4546 S4 will be skipped, and S5 will be vectorized as usual:
4548 in_pattern_p related_stmt vec_stmt
4549 S1: a_i = .... - - -
4550 S2: a_2 = ..use(a_i).. - - -
4551 S3: a_1 = ..use(a_2).. - - -
4552 > VS6: va_new = .... - - -
4553 S4: a_0 = ..use(a_1).. true S6 VS6
4554 '---> S6: a_new = .... - S4 VS6
4555 > VS5: ... = ..vuse(va_new).. - - -
4556 S5: ... = ..use(a_0).. - - -
4558 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4559 elsewhere), and we'll end up with:
4561 VS6: va_new = ....
4562 VS5: ... = ..vuse(va_new)..
4564 In case of more than one pattern statements, e.g., widen-mult with
4565 intermediate type:
4567 S1 a_t = ;
4568 S2 a_T = (TYPE) a_t;
4569 '--> S3: a_it = (interm_type) a_t;
4570 S4 prod_T = a_T * CONST;
4571 '--> S5: prod_T' = a_it w* CONST;
4573 there may be other users of a_T outside the pattern. In that case S2 will
4574 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4575 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4576 be recorded in S3. */
4578 void
4579 vect_pattern_recog (vec_info *vinfo)
4581 struct loop *loop;
4582 basic_block *bbs;
4583 unsigned int nbbs;
4584 gimple_stmt_iterator si;
4585 unsigned int i, j;
4586 auto_vec<gimple *, 1> stmts_to_replace;
4587 gimple *stmt;
4589 if (dump_enabled_p ())
4590 dump_printf_loc (MSG_NOTE, vect_location,
4591 "=== vect_pattern_recog ===\n");
4593 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4595 loop = LOOP_VINFO_LOOP (loop_vinfo);
4596 bbs = LOOP_VINFO_BBS (loop_vinfo);
4597 nbbs = loop->num_nodes;
4599 /* Scan through the loop stmts, applying the pattern recognition
4600 functions starting at each stmt visited: */
4601 for (i = 0; i < nbbs; i++)
4603 basic_block bb = bbs[i];
4604 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4606 /* Scan over all generic vect_recog_xxx_pattern functions. */
4607 for (j = 0; j < NUM_PATTERNS; j++)
4608 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4609 &stmts_to_replace))
4610 break;
4614 else
4616 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4617 for (si = bb_vinfo->region_begin;
4618 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4620 if ((stmt = gsi_stmt (si))
4621 && vinfo_for_stmt (stmt)
4622 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4623 continue;
4625 /* Scan over all generic vect_recog_xxx_pattern functions. */
4626 for (j = 0; j < NUM_PATTERNS; j++)
4627 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4628 &stmts_to_replace))
4629 break;