PR fortran/84313
[official-gcc.git] / gcc / tree-vect-patterns.c
blob25a2efb21f8ca062bb0763fdfa7ffcbbfef0ffed
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
49 /* Pattern recognition functions */
50 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
53 tree *);
54 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
55 tree *);
56 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
57 tree *);
58 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
59 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
60 tree *);
61 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
62 tree *, tree *);
63 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
64 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
65 tree *, tree *);
66 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
67 tree *, tree *);
69 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
70 tree *, tree *);
72 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
73 tree *, tree *);
74 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
75 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
76 static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
77 tree *);
79 struct vect_recog_func
81 vect_recog_func_ptr fn;
82 const char *name;
85 /* Note that ordering matters - the first pattern matching on a stmt
86 is taken which means usually the more complex one needs to preceed
87 the less comples onex (widen_sum only after dot_prod or sad for example). */
88 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
89 { vect_recog_widen_mult_pattern, "widen_mult" },
90 { vect_recog_dot_prod_pattern, "dot_prod" },
91 { vect_recog_sad_pattern, "sad" },
92 { vect_recog_widen_sum_pattern, "widen_sum" },
93 { vect_recog_pow_pattern, "pow" },
94 { vect_recog_widen_shift_pattern, "widen_shift" },
95 { vect_recog_over_widening_pattern, "over_widening" },
96 { vect_recog_rotate_pattern, "rotate" },
97 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
98 { vect_recog_divmod_pattern, "divmod" },
99 { vect_recog_mult_pattern, "mult" },
100 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
101 { vect_recog_bool_pattern, "bool" },
102 /* This must come before mask conversion, and includes the parts
103 of mask conversion that are needed for gather and scatter
104 internal functions. */
105 { vect_recog_gather_scatter_pattern, "gather_scatter" },
106 { vect_recog_mask_conversion_pattern, "mask_conversion" }
109 static inline void
110 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
112 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
113 stmt);
116 static inline void
117 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
119 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
120 append_pattern_def_seq (stmt_info, stmt);
123 /* Check whether STMT2 is in the same loop or basic block as STMT1.
124 Which of the two applies depends on whether we're currently doing
125 loop-based or basic-block-based vectorization, as determined by
126 the vinfo_for_stmt for STMT1 (which must be defined).
128 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
129 to be defined as well. */
131 static bool
132 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
134 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
135 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
138 /* If the LHS of DEF_STMT has a single use, and that statement is
139 in the same loop or basic block, return it. */
141 static gimple *
142 vect_single_imm_use (gimple *def_stmt)
144 tree lhs = gimple_assign_lhs (def_stmt);
145 use_operand_p use_p;
146 gimple *use_stmt;
148 if (!single_imm_use (lhs, &use_p, &use_stmt))
149 return NULL;
151 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
152 return NULL;
154 return use_stmt;
157 /* Check whether NAME, an ssa-name used in USE_STMT,
158 is a result of a type promotion, such that:
159 DEF_STMT: NAME = NOP (name0)
160 If CHECK_SIGN is TRUE, check that either both types are signed or both are
161 unsigned. */
163 static bool
164 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
165 tree *orig_type, gimple **def_stmt, bool *promotion)
167 gimple *dummy_gimple;
168 stmt_vec_info stmt_vinfo;
169 tree type = TREE_TYPE (name);
170 tree oprnd0;
171 enum vect_def_type dt;
173 stmt_vinfo = vinfo_for_stmt (use_stmt);
174 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
175 return false;
177 if (dt != vect_internal_def
178 && dt != vect_external_def && dt != vect_constant_def)
179 return false;
181 if (!*def_stmt)
182 return false;
184 if (dt == vect_internal_def)
186 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
187 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
188 return false;
191 if (!is_gimple_assign (*def_stmt))
192 return false;
194 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
195 return false;
197 oprnd0 = gimple_assign_rhs1 (*def_stmt);
199 *orig_type = TREE_TYPE (oprnd0);
200 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
201 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
202 return false;
204 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
205 *promotion = true;
206 else
207 *promotion = false;
209 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
210 return false;
212 return true;
215 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
216 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
218 static tree
219 vect_recog_temp_ssa_var (tree type, gimple *stmt)
221 return make_temp_ssa_name (type, stmt, "patt");
224 /* Return true if STMT_VINFO describes a reduction for which reassociation
225 is allowed. */
227 static bool
228 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
230 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
231 && STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION);
234 /* Function vect_recog_dot_prod_pattern
236 Try to find the following pattern:
238 type x_t, y_t;
239 TYPE1 prod;
240 TYPE2 sum = init;
241 loop:
242 sum_0 = phi <init, sum_1>
243 S1 x_t = ...
244 S2 y_t = ...
245 S3 x_T = (TYPE1) x_t;
246 S4 y_T = (TYPE1) y_t;
247 S5 prod = x_T * y_T;
248 [S6 prod = (TYPE2) prod; #optional]
249 S7 sum_1 = prod + sum_0;
251 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
252 same size of 'TYPE1' or bigger. This is a special case of a reduction
253 computation.
255 Input:
257 * STMTS: Contains a stmt from which the pattern search begins. In the
258 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
259 will be detected.
261 Output:
263 * TYPE_IN: The type of the input arguments to the pattern.
265 * TYPE_OUT: The type of the output of this pattern.
267 * Return value: A new stmt that will be used to replace the sequence of
268 stmts that constitute the pattern. In this case it will be:
269 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
271 Note: The dot-prod idiom is a widening reduction pattern that is
272 vectorized without preserving all the intermediate results. It
273 produces only N/2 (widened) results (by summing up pairs of
274 intermediate results) rather than all N results. Therefore, we
275 cannot allow this pattern when we want to get all the results and in
276 the correct order (as is the case when this computation is in an
277 inner-loop nested in an outer-loop that us being vectorized). */
279 static gimple *
280 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
281 tree *type_out)
283 gimple *stmt, *last_stmt = (*stmts)[0];
284 tree oprnd0, oprnd1;
285 tree oprnd00, oprnd01;
286 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
287 tree type, half_type;
288 gimple *pattern_stmt;
289 tree prod_type;
290 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
291 struct loop *loop;
292 tree var;
293 bool promotion;
295 if (!loop_info)
296 return NULL;
298 loop = LOOP_VINFO_LOOP (loop_info);
300 /* We don't allow changing the order of the computation in the inner-loop
301 when doing outer-loop vectorization. */
302 if (loop && nested_in_vect_loop_p (loop, last_stmt))
303 return NULL;
305 if (!is_gimple_assign (last_stmt))
306 return NULL;
308 type = gimple_expr_type (last_stmt);
310 /* Look for the following pattern
311 DX = (TYPE1) X;
312 DY = (TYPE1) Y;
313 DPROD = DX * DY;
314 DDPROD = (TYPE2) DPROD;
315 sum_1 = DDPROD + sum_0;
316 In which
317 - DX is double the size of X
318 - DY is double the size of Y
319 - DX, DY, DPROD all have the same type
320 - sum is the same size of DPROD or bigger
321 - sum has been recognized as a reduction variable.
323 This is equivalent to:
324 DPROD = X w* Y; #widen mult
325 sum_1 = DPROD w+ sum_0; #widen summation
327 DPROD = X w* Y; #widen mult
328 sum_1 = DPROD + sum_0; #summation
331 /* Starting from LAST_STMT, follow the defs of its uses in search
332 of the above pattern. */
334 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
335 return NULL;
337 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
339 /* Has been detected as widening-summation? */
341 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
342 type = gimple_expr_type (stmt);
343 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
344 return NULL;
345 oprnd0 = gimple_assign_rhs1 (stmt);
346 oprnd1 = gimple_assign_rhs2 (stmt);
347 half_type = TREE_TYPE (oprnd0);
349 else
351 gimple *def_stmt;
353 if (!vect_reassociating_reduction_p (stmt_vinfo)
354 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
355 return NULL;
356 oprnd0 = gimple_assign_rhs1 (last_stmt);
357 oprnd1 = gimple_assign_rhs2 (last_stmt);
358 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
359 || !types_compatible_p (TREE_TYPE (oprnd1), type))
360 return NULL;
361 stmt = last_stmt;
363 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
364 &promotion)
365 && promotion)
367 stmt = def_stmt;
368 oprnd0 = gimple_assign_rhs1 (stmt);
370 else
371 half_type = type;
374 /* So far so good. Since last_stmt was detected as a (summation) reduction,
375 we know that oprnd1 is the reduction variable (defined by a loop-header
376 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
377 Left to check that oprnd0 is defined by a (widen_)mult_expr */
378 if (TREE_CODE (oprnd0) != SSA_NAME)
379 return NULL;
381 prod_type = half_type;
382 stmt = SSA_NAME_DEF_STMT (oprnd0);
384 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
385 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
386 return NULL;
388 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
389 inside the loop (in case we are analyzing an outer-loop). */
390 if (!is_gimple_assign (stmt))
391 return NULL;
392 stmt_vinfo = vinfo_for_stmt (stmt);
393 gcc_assert (stmt_vinfo);
394 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
395 return NULL;
396 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
397 return NULL;
398 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
400 /* Has been detected as a widening multiplication? */
402 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
403 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
404 return NULL;
405 stmt_vinfo = vinfo_for_stmt (stmt);
406 gcc_assert (stmt_vinfo);
407 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
408 oprnd00 = gimple_assign_rhs1 (stmt);
409 oprnd01 = gimple_assign_rhs2 (stmt);
410 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
411 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
413 else
415 tree half_type0, half_type1;
416 gimple *def_stmt;
417 tree oprnd0, oprnd1;
419 oprnd0 = gimple_assign_rhs1 (stmt);
420 oprnd1 = gimple_assign_rhs2 (stmt);
421 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
422 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
423 return NULL;
424 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
425 &promotion)
426 || !promotion)
427 return NULL;
428 oprnd00 = gimple_assign_rhs1 (def_stmt);
429 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
430 &promotion)
431 || !promotion)
432 return NULL;
433 oprnd01 = gimple_assign_rhs1 (def_stmt);
434 if (!types_compatible_p (half_type0, half_type1))
435 return NULL;
436 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
437 return NULL;
440 half_type = TREE_TYPE (oprnd00);
441 *type_in = half_type;
442 *type_out = type;
444 /* Pattern detected. Create a stmt to be used to replace the pattern: */
445 var = vect_recog_temp_ssa_var (type, NULL);
446 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
447 oprnd00, oprnd01, oprnd1);
449 if (dump_enabled_p ())
451 dump_printf_loc (MSG_NOTE, vect_location,
452 "vect_recog_dot_prod_pattern: detected: ");
453 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
456 return pattern_stmt;
460 /* Function vect_recog_sad_pattern
462 Try to find the following Sum of Absolute Difference (SAD) pattern:
464 type x_t, y_t;
465 signed TYPE1 diff, abs_diff;
466 TYPE2 sum = init;
467 loop:
468 sum_0 = phi <init, sum_1>
469 S1 x_t = ...
470 S2 y_t = ...
471 S3 x_T = (TYPE1) x_t;
472 S4 y_T = (TYPE1) y_t;
473 S5 diff = x_T - y_T;
474 S6 abs_diff = ABS_EXPR <diff>;
475 [S7 abs_diff = (TYPE2) abs_diff; #optional]
476 S8 sum_1 = abs_diff + sum_0;
478 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
479 same size of 'TYPE1' or bigger. This is a special case of a reduction
480 computation.
482 Input:
484 * STMTS: Contains a stmt from which the pattern search begins. In the
485 example, when this function is called with S8, the pattern
486 {S3,S4,S5,S6,S7,S8} will be detected.
488 Output:
490 * TYPE_IN: The type of the input arguments to the pattern.
492 * TYPE_OUT: The type of the output of this pattern.
494 * Return value: A new stmt that will be used to replace the sequence of
495 stmts that constitute the pattern. In this case it will be:
496 SAD_EXPR <x_t, y_t, sum_0>
499 static gimple *
500 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
501 tree *type_out)
503 gimple *last_stmt = (*stmts)[0];
504 tree sad_oprnd0, sad_oprnd1;
505 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
506 tree half_type;
507 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
508 struct loop *loop;
509 bool promotion;
511 if (!loop_info)
512 return NULL;
514 loop = LOOP_VINFO_LOOP (loop_info);
516 /* We don't allow changing the order of the computation in the inner-loop
517 when doing outer-loop vectorization. */
518 if (loop && nested_in_vect_loop_p (loop, last_stmt))
519 return NULL;
521 if (!is_gimple_assign (last_stmt))
522 return NULL;
524 tree sum_type = gimple_expr_type (last_stmt);
526 /* Look for the following pattern
527 DX = (TYPE1) X;
528 DY = (TYPE1) Y;
529 DDIFF = DX - DY;
530 DAD = ABS_EXPR <DDIFF>;
531 DDPROD = (TYPE2) DPROD;
532 sum_1 = DAD + sum_0;
533 In which
534 - DX is at least double the size of X
535 - DY is at least double the size of Y
536 - DX, DY, DDIFF, DAD all have the same type
537 - sum is the same size of DAD or bigger
538 - sum has been recognized as a reduction variable.
540 This is equivalent to:
541 DDIFF = X w- Y; #widen sub
542 DAD = ABS_EXPR <DDIFF>;
543 sum_1 = DAD w+ sum_0; #widen summation
545 DDIFF = X w- Y; #widen sub
546 DAD = ABS_EXPR <DDIFF>;
547 sum_1 = DAD + sum_0; #summation
550 /* Starting from LAST_STMT, follow the defs of its uses in search
551 of the above pattern. */
553 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
554 return NULL;
556 tree plus_oprnd0, plus_oprnd1;
558 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
560 /* Has been detected as widening-summation? */
562 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
563 sum_type = gimple_expr_type (stmt);
564 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
565 return NULL;
566 plus_oprnd0 = gimple_assign_rhs1 (stmt);
567 plus_oprnd1 = gimple_assign_rhs2 (stmt);
568 half_type = TREE_TYPE (plus_oprnd0);
570 else
572 gimple *def_stmt;
574 if (!vect_reassociating_reduction_p (stmt_vinfo)
575 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
576 return NULL;
577 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
578 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
579 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
580 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
581 return NULL;
583 /* The type conversion could be promotion, demotion,
584 or just signed -> unsigned. */
585 if (type_conversion_p (plus_oprnd0, last_stmt, false,
586 &half_type, &def_stmt, &promotion))
587 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
588 else
589 half_type = sum_type;
592 /* So far so good. Since last_stmt was detected as a (summation) reduction,
593 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
594 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
595 Then check that plus_oprnd0 is defined by an abs_expr. */
597 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
598 return NULL;
600 tree abs_type = half_type;
601 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
603 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
604 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
605 return NULL;
607 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
608 inside the loop (in case we are analyzing an outer-loop). */
609 if (!is_gimple_assign (abs_stmt))
610 return NULL;
612 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
613 gcc_assert (abs_stmt_vinfo);
614 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
615 return NULL;
616 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
617 return NULL;
619 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
620 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
621 return NULL;
622 if (TYPE_UNSIGNED (abs_type))
623 return NULL;
625 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
627 if (TREE_CODE (abs_oprnd) != SSA_NAME)
628 return NULL;
630 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
632 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
633 if (!gimple_bb (diff_stmt)
634 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
635 return NULL;
637 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
638 inside the loop (in case we are analyzing an outer-loop). */
639 if (!is_gimple_assign (diff_stmt))
640 return NULL;
642 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
643 gcc_assert (diff_stmt_vinfo);
644 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
645 return NULL;
646 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
647 return NULL;
649 tree half_type0, half_type1;
650 gimple *def_stmt;
652 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
653 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
655 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
656 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
657 return NULL;
658 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
659 &half_type0, &def_stmt, &promotion)
660 || !promotion)
661 return NULL;
662 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
664 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
665 &half_type1, &def_stmt, &promotion)
666 || !promotion)
667 return NULL;
668 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
670 if (!types_compatible_p (half_type0, half_type1))
671 return NULL;
672 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
673 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
674 return NULL;
676 *type_in = TREE_TYPE (sad_oprnd0);
677 *type_out = sum_type;
679 /* Pattern detected. Create a stmt to be used to replace the pattern: */
680 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
681 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
682 sad_oprnd1, plus_oprnd1);
684 if (dump_enabled_p ())
686 dump_printf_loc (MSG_NOTE, vect_location,
687 "vect_recog_sad_pattern: detected: ");
688 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
691 return pattern_stmt;
695 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
696 and LSHIFT_EXPR.
698 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
699 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
701 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
702 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
703 that satisfies the above restrictions, we can perform a widening opeartion
704 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
705 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
707 static bool
708 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
709 tree const_oprnd, tree *oprnd,
710 gimple **wstmt, tree type,
711 tree *half_type, gimple *def_stmt)
713 tree new_type, new_oprnd;
715 if (code != MULT_EXPR && code != LSHIFT_EXPR)
716 return false;
718 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
719 || (code == LSHIFT_EXPR
720 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
721 != 1))
722 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
724 /* CONST_OPRND is a constant of HALF_TYPE. */
725 *oprnd = gimple_assign_rhs1 (def_stmt);
726 return true;
729 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
730 return false;
732 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
733 return false;
735 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
736 a type 2 times bigger than HALF_TYPE. */
737 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
738 TYPE_UNSIGNED (type));
739 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
740 || (code == LSHIFT_EXPR
741 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
742 return false;
744 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
745 *oprnd = gimple_assign_rhs1 (def_stmt);
746 new_oprnd = make_ssa_name (new_type);
747 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
748 *oprnd = new_oprnd;
750 *half_type = new_type;
751 return true;
755 /* Function vect_recog_widen_mult_pattern
757 Try to find the following pattern:
759 type1 a_t;
760 type2 b_t;
761 TYPE a_T, b_T, prod_T;
763 S1 a_t = ;
764 S2 b_t = ;
765 S3 a_T = (TYPE) a_t;
766 S4 b_T = (TYPE) b_t;
767 S5 prod_T = a_T * b_T;
769 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
771 Also detect unsigned cases:
773 unsigned type1 a_t;
774 unsigned type2 b_t;
775 unsigned TYPE u_prod_T;
776 TYPE a_T, b_T, prod_T;
778 S1 a_t = ;
779 S2 b_t = ;
780 S3 a_T = (TYPE) a_t;
781 S4 b_T = (TYPE) b_t;
782 S5 prod_T = a_T * b_T;
783 S6 u_prod_T = (unsigned TYPE) prod_T;
785 and multiplication by constants:
787 type a_t;
788 TYPE a_T, prod_T;
790 S1 a_t = ;
791 S3 a_T = (TYPE) a_t;
792 S5 prod_T = a_T * CONST;
794 A special case of multiplication by constants is when 'TYPE' is 4 times
795 bigger than 'type', but CONST fits an intermediate type 2 times smaller
796 than 'TYPE'. In that case we create an additional pattern stmt for S3
797 to create a variable of the intermediate type, and perform widen-mult
798 on the intermediate type as well:
800 type a_t;
801 interm_type a_it;
802 TYPE a_T, prod_T, prod_T';
804 S1 a_t = ;
805 S3 a_T = (TYPE) a_t;
806 '--> a_it = (interm_type) a_t;
807 S5 prod_T = a_T * CONST;
808 '--> prod_T' = a_it w* CONST;
810 Input/Output:
812 * STMTS: Contains a stmt from which the pattern search begins. In the
813 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
814 is detected. In case of unsigned widen-mult, the original stmt (S5) is
815 replaced with S6 in STMTS. In case of multiplication by a constant
816 of an intermediate type (the last case above), STMTS also contains S3
817 (inserted before S5).
819 Output:
821 * TYPE_IN: The type of the input arguments to the pattern.
823 * TYPE_OUT: The type of the output of this pattern.
825 * Return value: A new stmt that will be used to replace the sequence of
826 stmts that constitute the pattern. In this case it will be:
827 WIDEN_MULT <a_t, b_t>
828 If the result of WIDEN_MULT needs to be converted to a larger type, the
829 returned stmt will be this type conversion stmt.
832 static gimple *
833 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
834 tree *type_in, tree *type_out)
836 gimple *last_stmt = stmts->pop ();
837 gimple *def_stmt0, *def_stmt1;
838 tree oprnd0, oprnd1;
839 tree type, half_type0, half_type1;
840 gimple *new_stmt = NULL, *pattern_stmt = NULL;
841 tree vectype, vecitype;
842 tree var;
843 enum tree_code dummy_code;
844 int dummy_int;
845 vec<tree> dummy_vec;
846 bool op1_ok;
847 bool promotion;
849 if (!is_gimple_assign (last_stmt))
850 return NULL;
852 type = gimple_expr_type (last_stmt);
854 /* Starting from LAST_STMT, follow the defs of its uses in search
855 of the above pattern. */
857 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
858 return NULL;
860 oprnd0 = gimple_assign_rhs1 (last_stmt);
861 oprnd1 = gimple_assign_rhs2 (last_stmt);
862 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
863 || !types_compatible_p (TREE_TYPE (oprnd1), type))
864 return NULL;
866 /* Check argument 0. */
867 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
868 &promotion)
869 || !promotion)
870 return NULL;
871 /* Check argument 1. */
872 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
873 &def_stmt1, &promotion);
875 if (op1_ok && promotion)
877 oprnd0 = gimple_assign_rhs1 (def_stmt0);
878 oprnd1 = gimple_assign_rhs1 (def_stmt1);
880 else
882 if (TREE_CODE (oprnd1) == INTEGER_CST
883 && TREE_CODE (half_type0) == INTEGER_TYPE
884 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
885 &oprnd0, &new_stmt, type,
886 &half_type0, def_stmt0))
888 half_type1 = half_type0;
889 oprnd1 = fold_convert (half_type1, oprnd1);
891 else
892 return NULL;
895 /* If the two arguments have different sizes, convert the one with
896 the smaller type into the larger type. */
897 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
899 /* If we already used up the single-stmt slot give up. */
900 if (new_stmt)
901 return NULL;
903 tree* oprnd = NULL;
904 gimple *def_stmt = NULL;
906 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
908 def_stmt = def_stmt0;
909 half_type0 = half_type1;
910 oprnd = &oprnd0;
912 else
914 def_stmt = def_stmt1;
915 half_type1 = half_type0;
916 oprnd = &oprnd1;
919 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
920 tree new_oprnd = make_ssa_name (half_type0);
921 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
922 *oprnd = new_oprnd;
925 /* Handle unsigned case. Look for
926 S6 u_prod_T = (unsigned TYPE) prod_T;
927 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
928 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
930 gimple *use_stmt;
931 tree use_lhs;
932 tree use_type;
934 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
935 return NULL;
937 use_stmt = vect_single_imm_use (last_stmt);
938 if (!use_stmt || !is_gimple_assign (use_stmt)
939 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
940 return NULL;
942 use_lhs = gimple_assign_lhs (use_stmt);
943 use_type = TREE_TYPE (use_lhs);
944 if (!INTEGRAL_TYPE_P (use_type)
945 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
946 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
947 return NULL;
949 type = use_type;
950 last_stmt = use_stmt;
953 if (!types_compatible_p (half_type0, half_type1))
954 return NULL;
956 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
957 to get an intermediate result of type ITYPE. In this case we need
958 to build a statement to convert this intermediate result to type TYPE. */
959 tree itype = type;
960 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
961 itype = build_nonstandard_integer_type
962 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
963 TYPE_UNSIGNED (type));
965 /* Pattern detected. */
966 if (dump_enabled_p ())
967 dump_printf_loc (MSG_NOTE, vect_location,
968 "vect_recog_widen_mult_pattern: detected:\n");
970 /* Check target support */
971 vectype = get_vectype_for_scalar_type (half_type0);
972 vecitype = get_vectype_for_scalar_type (itype);
973 if (!vectype
974 || !vecitype
975 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
976 vecitype, vectype,
977 &dummy_code, &dummy_code,
978 &dummy_int, &dummy_vec))
979 return NULL;
981 *type_in = vectype;
982 *type_out = get_vectype_for_scalar_type (type);
984 /* Pattern supported. Create a stmt to be used to replace the pattern: */
985 var = vect_recog_temp_ssa_var (itype, NULL);
986 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
988 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
989 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
991 /* If the original two operands have different sizes, we may need to convert
992 the smaller one into the larget type. If this is the case, at this point
993 the new stmt is already built. */
994 if (new_stmt)
996 append_pattern_def_seq (stmt_vinfo, new_stmt);
997 stmt_vec_info new_stmt_info
998 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
999 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1000 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1003 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1004 the result of the widen-mult operation into type TYPE. */
1005 if (itype != type)
1007 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1008 stmt_vec_info pattern_stmt_info
1009 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
1010 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1011 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1012 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1013 NOP_EXPR,
1014 gimple_assign_lhs (pattern_stmt));
1017 if (dump_enabled_p ())
1018 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1020 stmts->safe_push (last_stmt);
1021 return pattern_stmt;
1025 /* Function vect_recog_pow_pattern
1027 Try to find the following pattern:
1029 x = POW (y, N);
1031 with POW being one of pow, powf, powi, powif and N being
1032 either 2 or 0.5.
1034 Input:
1036 * LAST_STMT: A stmt from which the pattern search begins.
1038 Output:
1040 * TYPE_IN: The type of the input arguments to the pattern.
1042 * TYPE_OUT: The type of the output of this pattern.
1044 * Return value: A new stmt that will be used to replace the sequence of
1045 stmts that constitute the pattern. In this case it will be:
1046 x = x * x
1048 x = sqrt (x)
1051 static gimple *
1052 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1053 tree *type_out)
1055 gimple *last_stmt = (*stmts)[0];
1056 tree base, exp;
1057 gimple *stmt;
1058 tree var;
1060 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1061 return NULL;
1063 switch (gimple_call_combined_fn (last_stmt))
1065 CASE_CFN_POW:
1066 CASE_CFN_POWI:
1067 break;
1069 default:
1070 return NULL;
1073 base = gimple_call_arg (last_stmt, 0);
1074 exp = gimple_call_arg (last_stmt, 1);
1075 if (TREE_CODE (exp) != REAL_CST
1076 && TREE_CODE (exp) != INTEGER_CST)
1078 if (flag_unsafe_math_optimizations
1079 && TREE_CODE (base) == REAL_CST
1080 && !gimple_call_internal_p (last_stmt))
1082 combined_fn log_cfn;
1083 built_in_function exp_bfn;
1084 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1086 case BUILT_IN_POW:
1087 log_cfn = CFN_BUILT_IN_LOG;
1088 exp_bfn = BUILT_IN_EXP;
1089 break;
1090 case BUILT_IN_POWF:
1091 log_cfn = CFN_BUILT_IN_LOGF;
1092 exp_bfn = BUILT_IN_EXPF;
1093 break;
1094 case BUILT_IN_POWL:
1095 log_cfn = CFN_BUILT_IN_LOGL;
1096 exp_bfn = BUILT_IN_EXPL;
1097 break;
1098 default:
1099 return NULL;
1101 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1102 tree exp_decl = builtin_decl_implicit (exp_bfn);
1103 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1104 does that, but if C is a power of 2, we want to use
1105 exp2 (log2 (C) * x) in the non-vectorized version, but for
1106 vectorization we don't have vectorized exp2. */
1107 if (logc
1108 && TREE_CODE (logc) == REAL_CST
1109 && exp_decl
1110 && lookup_attribute ("omp declare simd",
1111 DECL_ATTRIBUTES (exp_decl)))
1113 cgraph_node *node = cgraph_node::get_create (exp_decl);
1114 if (node->simd_clones == NULL)
1116 if (node->definition)
1117 return NULL;
1118 expand_simd_clones (node);
1119 if (node->simd_clones == NULL)
1120 return NULL;
1122 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1123 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1124 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1125 new_pattern_def_seq (stmt_vinfo, g);
1126 *type_in = TREE_TYPE (base);
1127 *type_out = NULL_TREE;
1128 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1129 g = gimple_build_call (exp_decl, 1, def);
1130 gimple_call_set_lhs (g, res);
1131 return g;
1135 return NULL;
1138 /* We now have a pow or powi builtin function call with a constant
1139 exponent. */
1141 *type_out = NULL_TREE;
1143 /* Catch squaring. */
1144 if ((tree_fits_shwi_p (exp)
1145 && tree_to_shwi (exp) == 2)
1146 || (TREE_CODE (exp) == REAL_CST
1147 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1149 *type_in = TREE_TYPE (base);
1151 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1152 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1153 return stmt;
1156 /* Catch square root. */
1157 if (TREE_CODE (exp) == REAL_CST
1158 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1160 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1161 if (*type_in
1162 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1163 OPTIMIZE_FOR_SPEED))
1165 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1166 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1167 gimple_call_set_lhs (stmt, var);
1168 gimple_call_set_nothrow (stmt, true);
1169 return stmt;
1173 return NULL;
1177 /* Function vect_recog_widen_sum_pattern
1179 Try to find the following pattern:
1181 type x_t;
1182 TYPE x_T, sum = init;
1183 loop:
1184 sum_0 = phi <init, sum_1>
1185 S1 x_t = *p;
1186 S2 x_T = (TYPE) x_t;
1187 S3 sum_1 = x_T + sum_0;
1189 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1190 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1191 a special case of a reduction computation.
1193 Input:
1195 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1196 when this function is called with S3, the pattern {S2,S3} will be detected.
1198 Output:
1200 * TYPE_IN: The type of the input arguments to the pattern.
1202 * TYPE_OUT: The type of the output of this pattern.
1204 * Return value: A new stmt that will be used to replace the sequence of
1205 stmts that constitute the pattern. In this case it will be:
1206 WIDEN_SUM <x_t, sum_0>
1208 Note: The widening-sum idiom is a widening reduction pattern that is
1209 vectorized without preserving all the intermediate results. It
1210 produces only N/2 (widened) results (by summing up pairs of
1211 intermediate results) rather than all N results. Therefore, we
1212 cannot allow this pattern when we want to get all the results and in
1213 the correct order (as is the case when this computation is in an
1214 inner-loop nested in an outer-loop that us being vectorized). */
1216 static gimple *
1217 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1218 tree *type_out)
1220 gimple *stmt, *last_stmt = (*stmts)[0];
1221 tree oprnd0, oprnd1;
1222 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1223 tree type, half_type;
1224 gimple *pattern_stmt;
1225 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1226 struct loop *loop;
1227 tree var;
1228 bool promotion;
1230 if (!loop_info)
1231 return NULL;
1233 loop = LOOP_VINFO_LOOP (loop_info);
1235 /* We don't allow changing the order of the computation in the inner-loop
1236 when doing outer-loop vectorization. */
1237 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1238 return NULL;
1240 if (!is_gimple_assign (last_stmt))
1241 return NULL;
1243 type = gimple_expr_type (last_stmt);
1245 /* Look for the following pattern
1246 DX = (TYPE) X;
1247 sum_1 = DX + sum_0;
1248 In which DX is at least double the size of X, and sum_1 has been
1249 recognized as a reduction variable.
1252 /* Starting from LAST_STMT, follow the defs of its uses in search
1253 of the above pattern. */
1255 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1256 return NULL;
1258 if (!vect_reassociating_reduction_p (stmt_vinfo)
1259 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
1260 return NULL;
1262 oprnd0 = gimple_assign_rhs1 (last_stmt);
1263 oprnd1 = gimple_assign_rhs2 (last_stmt);
1264 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1265 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1266 return NULL;
1268 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1269 we know that oprnd1 is the reduction variable (defined by a loop-header
1270 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1271 Left to check that oprnd0 is defined by a cast from type 'type' to type
1272 'TYPE'. */
1274 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1275 &promotion)
1276 || !promotion)
1277 return NULL;
1279 oprnd0 = gimple_assign_rhs1 (stmt);
1280 *type_in = half_type;
1281 *type_out = type;
1283 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1284 var = vect_recog_temp_ssa_var (type, NULL);
1285 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1287 if (dump_enabled_p ())
1289 dump_printf_loc (MSG_NOTE, vect_location,
1290 "vect_recog_widen_sum_pattern: detected: ");
1291 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1294 return pattern_stmt;
1298 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1300 Input:
1301 STMT - a statement to check.
1302 DEF - we support operations with two operands, one of which is constant.
1303 The other operand can be defined by a demotion operation, or by a
1304 previous statement in a sequence of over-promoted operations. In the
1305 later case DEF is used to replace that operand. (It is defined by a
1306 pattern statement we created for the previous statement in the
1307 sequence).
1309 Input/output:
1310 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1311 NULL, it's the type of DEF.
1312 STMTS - additional pattern statements. If a pattern statement (type
1313 conversion) is created in this function, its original statement is
1314 added to STMTS.
1316 Output:
1317 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1318 operands to use in the new pattern statement for STMT (will be created
1319 in vect_recog_over_widening_pattern ()).
1320 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1321 statements for STMT: the first one is a type promotion and the second
1322 one is the operation itself. We return the type promotion statement
1323 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1324 the second pattern statement. */
1326 static bool
1327 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1328 tree *op0, tree *op1, gimple **new_def_stmt,
1329 vec<gimple *> *stmts)
1331 enum tree_code code;
1332 tree const_oprnd, oprnd;
1333 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1334 gimple *def_stmt, *new_stmt;
1335 bool first = false;
1336 bool promotion;
1338 *op0 = NULL_TREE;
1339 *op1 = NULL_TREE;
1340 *new_def_stmt = NULL;
1342 if (!is_gimple_assign (stmt))
1343 return false;
1345 code = gimple_assign_rhs_code (stmt);
1346 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1347 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1348 return false;
1350 oprnd = gimple_assign_rhs1 (stmt);
1351 const_oprnd = gimple_assign_rhs2 (stmt);
1352 type = gimple_expr_type (stmt);
1354 if (TREE_CODE (oprnd) != SSA_NAME
1355 || TREE_CODE (const_oprnd) != INTEGER_CST)
1356 return false;
1358 /* If oprnd has other uses besides that in stmt we cannot mark it
1359 as being part of a pattern only. */
1360 if (!has_single_use (oprnd))
1361 return false;
1363 /* If we are in the middle of a sequence, we use DEF from a previous
1364 statement. Otherwise, OPRND has to be a result of type promotion. */
1365 if (*new_type)
1367 half_type = *new_type;
1368 oprnd = def;
1370 else
1372 first = true;
1373 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1374 &promotion)
1375 || !promotion
1376 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1377 return false;
1380 /* Can we perform the operation on a smaller type? */
1381 switch (code)
1383 case BIT_IOR_EXPR:
1384 case BIT_XOR_EXPR:
1385 case BIT_AND_EXPR:
1386 if (!int_fits_type_p (const_oprnd, half_type))
1388 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1389 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1390 return false;
1392 interm_type = build_nonstandard_integer_type (
1393 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1394 if (!int_fits_type_p (const_oprnd, interm_type))
1395 return false;
1398 break;
1400 case LSHIFT_EXPR:
1401 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1402 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1403 return false;
1405 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1406 (e.g., if the original value was char, the shift amount is at most 8
1407 if we want to use short). */
1408 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1409 return false;
1411 interm_type = build_nonstandard_integer_type (
1412 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1414 if (!vect_supportable_shift (code, interm_type))
1415 return false;
1417 break;
1419 case RSHIFT_EXPR:
1420 if (vect_supportable_shift (code, half_type))
1421 break;
1423 /* Try intermediate type - HALF_TYPE is not supported. */
1424 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1425 return false;
1427 interm_type = build_nonstandard_integer_type (
1428 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1430 if (!vect_supportable_shift (code, interm_type))
1431 return false;
1433 break;
1435 default:
1436 gcc_unreachable ();
1439 /* There are four possible cases:
1440 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1441 the first statement in the sequence)
1442 a. The original, HALF_TYPE, is not enough - we replace the promotion
1443 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1444 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1445 promotion.
1446 2. OPRND is defined by a pattern statement we created.
1447 a. Its type is not sufficient for the operation, we create a new stmt:
1448 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1449 this statement in NEW_DEF_STMT, and it is later put in
1450 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1451 b. OPRND is good to use in the new statement. */
1452 if (first)
1454 if (interm_type)
1456 /* Replace the original type conversion HALF_TYPE->TYPE with
1457 HALF_TYPE->INTERM_TYPE. */
1458 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1460 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1461 /* Check if the already created pattern stmt is what we need. */
1462 if (!is_gimple_assign (new_stmt)
1463 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1464 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1465 return false;
1467 stmts->safe_push (def_stmt);
1468 oprnd = gimple_assign_lhs (new_stmt);
1470 else
1472 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1473 oprnd = gimple_assign_rhs1 (def_stmt);
1474 new_oprnd = make_ssa_name (interm_type);
1475 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1476 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1477 stmts->safe_push (def_stmt);
1478 oprnd = new_oprnd;
1481 else
1483 /* Retrieve the operand before the type promotion. */
1484 oprnd = gimple_assign_rhs1 (def_stmt);
1487 else
1489 if (interm_type)
1491 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1492 new_oprnd = make_ssa_name (interm_type);
1493 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1494 oprnd = new_oprnd;
1495 *new_def_stmt = new_stmt;
1498 /* Otherwise, OPRND is already set. */
1501 if (interm_type)
1502 *new_type = interm_type;
1503 else
1504 *new_type = half_type;
1506 *op0 = oprnd;
1507 *op1 = fold_convert (*new_type, const_oprnd);
1509 return true;
1513 /* Try to find a statement or a sequence of statements that can be performed
1514 on a smaller type:
1516 type x_t;
1517 TYPE x_T, res0_T, res1_T;
1518 loop:
1519 S1 x_t = *p;
1520 S2 x_T = (TYPE) x_t;
1521 S3 res0_T = op (x_T, C0);
1522 S4 res1_T = op (res0_T, C1);
1523 S5 ... = () res1_T; - type demotion
1525 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1526 constants.
1527 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1528 be 'type' or some intermediate type. For now, we expect S5 to be a type
1529 demotion operation. We also check that S3 and S4 have only one use. */
1531 static gimple *
1532 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1533 tree *type_in, tree *type_out)
1535 gimple *stmt = stmts->pop ();
1536 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1537 *use_stmt = NULL;
1538 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1539 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1540 bool first;
1541 tree type = NULL;
1543 first = true;
1544 while (1)
1546 if (!vinfo_for_stmt (stmt)
1547 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1548 return NULL;
1550 new_def_stmt = NULL;
1551 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1552 &op0, &op1, &new_def_stmt,
1553 stmts))
1555 if (first)
1556 return NULL;
1557 else
1558 break;
1561 /* STMT can be performed on a smaller type. Check its uses. */
1562 use_stmt = vect_single_imm_use (stmt);
1563 if (!use_stmt || !is_gimple_assign (use_stmt))
1564 return NULL;
1566 /* Create pattern statement for STMT. */
1567 vectype = get_vectype_for_scalar_type (new_type);
1568 if (!vectype)
1569 return NULL;
1571 /* We want to collect all the statements for which we create pattern
1572 statetments, except for the case when the last statement in the
1573 sequence doesn't have a corresponding pattern statement. In such
1574 case we associate the last pattern statement with the last statement
1575 in the sequence. Therefore, we only add the original statement to
1576 the list if we know that it is not the last. */
1577 if (prev_stmt)
1578 stmts->safe_push (prev_stmt);
1580 var = vect_recog_temp_ssa_var (new_type, NULL);
1581 pattern_stmt
1582 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1583 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1584 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1586 if (dump_enabled_p ())
1588 dump_printf_loc (MSG_NOTE, vect_location,
1589 "created pattern stmt: ");
1590 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1593 type = gimple_expr_type (stmt);
1594 prev_stmt = stmt;
1595 stmt = use_stmt;
1597 first = false;
1600 /* We got a sequence. We expect it to end with a type demotion operation.
1601 Otherwise, we quit (for now). There are three possible cases: the
1602 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1603 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1604 NEW_TYPE differs (we create a new conversion statement). */
1605 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1607 use_lhs = gimple_assign_lhs (use_stmt);
1608 use_type = TREE_TYPE (use_lhs);
1609 /* Support only type demotion or signedess change. */
1610 if (!INTEGRAL_TYPE_P (use_type)
1611 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1612 return NULL;
1614 /* Check that NEW_TYPE is not bigger than the conversion result. */
1615 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1616 return NULL;
1618 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1619 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1621 /* Create NEW_TYPE->USE_TYPE conversion. */
1622 new_oprnd = make_ssa_name (use_type);
1623 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1624 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1626 *type_in = get_vectype_for_scalar_type (new_type);
1627 *type_out = get_vectype_for_scalar_type (use_type);
1629 /* We created a pattern statement for the last statement in the
1630 sequence, so we don't need to associate it with the pattern
1631 statement created for PREV_STMT. Therefore, we add PREV_STMT
1632 to the list in order to mark it later in vect_pattern_recog_1. */
1633 if (prev_stmt)
1634 stmts->safe_push (prev_stmt);
1636 else
1638 if (prev_stmt)
1639 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1640 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1642 *type_in = vectype;
1643 *type_out = NULL_TREE;
1646 stmts->safe_push (use_stmt);
1648 else
1649 /* TODO: support general case, create a conversion to the correct type. */
1650 return NULL;
1652 /* Pattern detected. */
1653 if (dump_enabled_p ())
1655 dump_printf_loc (MSG_NOTE, vect_location,
1656 "vect_recog_over_widening_pattern: detected: ");
1657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1660 return pattern_stmt;
1663 /* Detect widening shift pattern:
1665 type a_t;
1666 TYPE a_T, res_T;
1668 S1 a_t = ;
1669 S2 a_T = (TYPE) a_t;
1670 S3 res_T = a_T << CONST;
1672 where type 'TYPE' is at least double the size of type 'type'.
1674 Also detect cases where the shift result is immediately converted
1675 to another type 'result_type' that is no larger in size than 'TYPE'.
1676 In those cases we perform a widen-shift that directly results in
1677 'result_type', to avoid a possible over-widening situation:
1679 type a_t;
1680 TYPE a_T, res_T;
1681 result_type res_result;
1683 S1 a_t = ;
1684 S2 a_T = (TYPE) a_t;
1685 S3 res_T = a_T << CONST;
1686 S4 res_result = (result_type) res_T;
1687 '--> res_result' = a_t w<< CONST;
1689 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1690 create an additional pattern stmt for S2 to create a variable of an
1691 intermediate type, and perform widen-shift on the intermediate type:
1693 type a_t;
1694 interm_type a_it;
1695 TYPE a_T, res_T, res_T';
1697 S1 a_t = ;
1698 S2 a_T = (TYPE) a_t;
1699 '--> a_it = (interm_type) a_t;
1700 S3 res_T = a_T << CONST;
1701 '--> res_T' = a_it <<* CONST;
1703 Input/Output:
1705 * STMTS: Contains a stmt from which the pattern search begins.
1706 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1707 in STMTS. When an intermediate type is used and a pattern statement is
1708 created for S2, we also put S2 here (before S3).
1710 Output:
1712 * TYPE_IN: The type of the input arguments to the pattern.
1714 * TYPE_OUT: The type of the output of this pattern.
1716 * Return value: A new stmt that will be used to replace the sequence of
1717 stmts that constitute the pattern. In this case it will be:
1718 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1720 static gimple *
1721 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1722 tree *type_in, tree *type_out)
1724 gimple *last_stmt = stmts->pop ();
1725 gimple *def_stmt0;
1726 tree oprnd0, oprnd1;
1727 tree type, half_type0;
1728 gimple *pattern_stmt;
1729 tree vectype, vectype_out = NULL_TREE;
1730 tree var;
1731 enum tree_code dummy_code;
1732 int dummy_int;
1733 vec<tree> dummy_vec;
1734 gimple *use_stmt;
1735 bool promotion;
1737 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1738 return NULL;
1740 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1741 return NULL;
1743 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1744 return NULL;
1746 oprnd0 = gimple_assign_rhs1 (last_stmt);
1747 oprnd1 = gimple_assign_rhs2 (last_stmt);
1748 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1749 return NULL;
1751 /* Check operand 0: it has to be defined by a type promotion. */
1752 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1753 &promotion)
1754 || !promotion)
1755 return NULL;
1757 /* Check operand 1: has to be positive. We check that it fits the type
1758 in vect_handle_widen_op_by_const (). */
1759 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1760 return NULL;
1762 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1763 type = gimple_expr_type (last_stmt);
1765 /* Check for subsequent conversion to another type. */
1766 use_stmt = vect_single_imm_use (last_stmt);
1767 if (use_stmt && is_gimple_assign (use_stmt)
1768 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1769 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1771 tree use_lhs = gimple_assign_lhs (use_stmt);
1772 tree use_type = TREE_TYPE (use_lhs);
1774 if (INTEGRAL_TYPE_P (use_type)
1775 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1777 last_stmt = use_stmt;
1778 type = use_type;
1782 /* Check if this a widening operation. */
1783 gimple *wstmt = NULL;
1784 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1785 &oprnd0, &wstmt,
1786 type, &half_type0, def_stmt0))
1787 return NULL;
1789 /* Pattern detected. */
1790 if (dump_enabled_p ())
1791 dump_printf_loc (MSG_NOTE, vect_location,
1792 "vect_recog_widen_shift_pattern: detected:\n");
1794 /* Check target support. */
1795 vectype = get_vectype_for_scalar_type (half_type0);
1796 vectype_out = get_vectype_for_scalar_type (type);
1798 if (!vectype
1799 || !vectype_out
1800 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1801 vectype_out, vectype,
1802 &dummy_code, &dummy_code,
1803 &dummy_int, &dummy_vec))
1804 return NULL;
1806 *type_in = vectype;
1807 *type_out = vectype_out;
1809 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1810 var = vect_recog_temp_ssa_var (type, NULL);
1811 pattern_stmt
1812 = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1813 if (wstmt)
1815 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1816 new_pattern_def_seq (stmt_vinfo, wstmt);
1817 stmt_vec_info new_stmt_info
1818 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1819 set_vinfo_for_stmt (wstmt, new_stmt_info);
1820 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1823 if (dump_enabled_p ())
1824 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1826 stmts->safe_push (last_stmt);
1827 return pattern_stmt;
1830 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1832 type a_t, b_t, c_t;
1834 S0 a_t = b_t r<< c_t;
1836 Input/Output:
1838 * STMTS: Contains a stmt from which the pattern search begins,
1839 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1840 with a sequence:
1842 S1 d_t = -c_t;
1843 S2 e_t = d_t & (B - 1);
1844 S3 f_t = b_t << c_t;
1845 S4 g_t = b_t >> e_t;
1846 S0 a_t = f_t | g_t;
1848 where B is element bitsize of type.
1850 Output:
1852 * TYPE_IN: The type of the input arguments to the pattern.
1854 * TYPE_OUT: The type of the output of this pattern.
1856 * Return value: A new stmt that will be used to replace the rotate
1857 S0 stmt. */
1859 static gimple *
1860 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1862 gimple *last_stmt = stmts->pop ();
1863 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1864 gimple *pattern_stmt, *def_stmt;
1865 enum tree_code rhs_code;
1866 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1867 vec_info *vinfo = stmt_vinfo->vinfo;
1868 enum vect_def_type dt;
1869 optab optab1, optab2;
1870 edge ext_def = NULL;
1872 if (!is_gimple_assign (last_stmt))
1873 return NULL;
1875 rhs_code = gimple_assign_rhs_code (last_stmt);
1876 switch (rhs_code)
1878 case LROTATE_EXPR:
1879 case RROTATE_EXPR:
1880 break;
1881 default:
1882 return NULL;
1885 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1886 return NULL;
1888 lhs = gimple_assign_lhs (last_stmt);
1889 oprnd0 = gimple_assign_rhs1 (last_stmt);
1890 type = TREE_TYPE (oprnd0);
1891 oprnd1 = gimple_assign_rhs2 (last_stmt);
1892 if (TREE_CODE (oprnd0) != SSA_NAME
1893 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1894 || !INTEGRAL_TYPE_P (type)
1895 || !TYPE_UNSIGNED (type))
1896 return NULL;
1898 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1899 return NULL;
1901 if (dt != vect_internal_def
1902 && dt != vect_constant_def
1903 && dt != vect_external_def)
1904 return NULL;
1906 vectype = get_vectype_for_scalar_type (type);
1907 if (vectype == NULL_TREE)
1908 return NULL;
1910 /* If vector/vector or vector/scalar rotate is supported by the target,
1911 don't do anything here. */
1912 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1913 if (optab1
1914 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1915 return NULL;
1917 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1919 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1920 if (optab2
1921 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1922 return NULL;
1925 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1926 don't do anything here either. */
1927 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1928 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1929 if (!optab1
1930 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1931 || !optab2
1932 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1934 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1935 return NULL;
1936 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1937 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1938 if (!optab1
1939 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1940 || !optab2
1941 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1942 return NULL;
1945 *type_in = vectype;
1946 *type_out = vectype;
1947 if (*type_in == NULL_TREE)
1948 return NULL;
1950 if (dt == vect_external_def
1951 && TREE_CODE (oprnd1) == SSA_NAME
1952 && is_a <loop_vec_info> (vinfo))
1954 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1955 ext_def = loop_preheader_edge (loop);
1956 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1958 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1959 if (bb == NULL
1960 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1961 ext_def = NULL;
1965 def = NULL_TREE;
1966 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1967 if (TREE_CODE (oprnd1) == INTEGER_CST
1968 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1969 def = oprnd1;
1970 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1972 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1973 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1974 && TYPE_PRECISION (TREE_TYPE (rhs1))
1975 == TYPE_PRECISION (type))
1976 def = rhs1;
1979 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1980 if (def == NULL_TREE)
1982 def = vect_recog_temp_ssa_var (type, NULL);
1983 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1984 if (ext_def)
1986 basic_block new_bb
1987 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1988 gcc_assert (!new_bb);
1990 else
1991 append_pattern_def_seq (stmt_vinfo, def_stmt);
1993 stype = TREE_TYPE (def);
1994 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1996 if (TREE_CODE (def) == INTEGER_CST)
1998 if (!tree_fits_uhwi_p (def)
1999 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2000 || integer_zerop (def))
2001 return NULL;
2002 def2 = build_int_cst (stype,
2003 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2005 else
2007 tree vecstype = get_vectype_for_scalar_type (stype);
2008 stmt_vec_info def_stmt_vinfo;
2010 if (vecstype == NULL_TREE)
2011 return NULL;
2012 def2 = vect_recog_temp_ssa_var (stype, NULL);
2013 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2014 if (ext_def)
2016 basic_block new_bb
2017 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2018 gcc_assert (!new_bb);
2020 else
2022 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2023 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2024 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2025 append_pattern_def_seq (stmt_vinfo, def_stmt);
2028 def2 = vect_recog_temp_ssa_var (stype, NULL);
2029 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2030 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2031 gimple_assign_lhs (def_stmt), mask);
2032 if (ext_def)
2034 basic_block new_bb
2035 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2036 gcc_assert (!new_bb);
2038 else
2040 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2041 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2042 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2043 append_pattern_def_seq (stmt_vinfo, def_stmt);
2047 var1 = vect_recog_temp_ssa_var (type, NULL);
2048 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2049 ? LSHIFT_EXPR : RSHIFT_EXPR,
2050 oprnd0, def);
2051 append_pattern_def_seq (stmt_vinfo, def_stmt);
2053 var2 = vect_recog_temp_ssa_var (type, NULL);
2054 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2055 ? RSHIFT_EXPR : LSHIFT_EXPR,
2056 oprnd0, def2);
2057 append_pattern_def_seq (stmt_vinfo, def_stmt);
2059 /* Pattern detected. */
2060 if (dump_enabled_p ())
2061 dump_printf_loc (MSG_NOTE, vect_location,
2062 "vect_recog_rotate_pattern: detected:\n");
2064 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2065 var = vect_recog_temp_ssa_var (type, NULL);
2066 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2068 if (dump_enabled_p ())
2069 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2071 stmts->safe_push (last_stmt);
2072 return pattern_stmt;
2075 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2076 vectorized:
2078 type a_t;
2079 TYPE b_T, res_T;
2081 S1 a_t = ;
2082 S2 b_T = ;
2083 S3 res_T = b_T op a_t;
2085 where type 'TYPE' is a type with different size than 'type',
2086 and op is <<, >> or rotate.
2088 Also detect cases:
2090 type a_t;
2091 TYPE b_T, c_T, res_T;
2093 S0 c_T = ;
2094 S1 a_t = (type) c_T;
2095 S2 b_T = ;
2096 S3 res_T = b_T op a_t;
2098 Input/Output:
2100 * STMTS: Contains a stmt from which the pattern search begins,
2101 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2102 with a shift/rotate which has same type on both operands, in the
2103 second case just b_T op c_T, in the first case with added cast
2104 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2106 Output:
2108 * TYPE_IN: The type of the input arguments to the pattern.
2110 * TYPE_OUT: The type of the output of this pattern.
2112 * Return value: A new stmt that will be used to replace the shift/rotate
2113 S3 stmt. */
2115 static gimple *
2116 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2117 tree *type_in, tree *type_out)
2119 gimple *last_stmt = stmts->pop ();
2120 tree oprnd0, oprnd1, lhs, var;
2121 gimple *pattern_stmt, *def_stmt;
2122 enum tree_code rhs_code;
2123 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2124 vec_info *vinfo = stmt_vinfo->vinfo;
2125 enum vect_def_type dt;
2127 if (!is_gimple_assign (last_stmt))
2128 return NULL;
2130 rhs_code = gimple_assign_rhs_code (last_stmt);
2131 switch (rhs_code)
2133 case LSHIFT_EXPR:
2134 case RSHIFT_EXPR:
2135 case LROTATE_EXPR:
2136 case RROTATE_EXPR:
2137 break;
2138 default:
2139 return NULL;
2142 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2143 return NULL;
2145 lhs = gimple_assign_lhs (last_stmt);
2146 oprnd0 = gimple_assign_rhs1 (last_stmt);
2147 oprnd1 = gimple_assign_rhs2 (last_stmt);
2148 if (TREE_CODE (oprnd0) != SSA_NAME
2149 || TREE_CODE (oprnd1) != SSA_NAME
2150 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2151 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2152 || TYPE_PRECISION (TREE_TYPE (lhs))
2153 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2154 return NULL;
2156 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2157 return NULL;
2159 if (dt != vect_internal_def)
2160 return NULL;
2162 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2163 *type_out = *type_in;
2164 if (*type_in == NULL_TREE)
2165 return NULL;
2167 tree def = NULL_TREE;
2168 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2169 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2171 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2172 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2173 && TYPE_PRECISION (TREE_TYPE (rhs1))
2174 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2176 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2177 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2178 def = rhs1;
2179 else
2181 tree mask
2182 = build_low_bits_mask (TREE_TYPE (rhs1),
2183 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2184 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2185 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2186 new_pattern_def_seq (stmt_vinfo, def_stmt);
2191 if (def == NULL_TREE)
2193 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2194 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2195 new_pattern_def_seq (stmt_vinfo, def_stmt);
2198 /* Pattern detected. */
2199 if (dump_enabled_p ())
2200 dump_printf_loc (MSG_NOTE, vect_location,
2201 "vect_recog_vector_vector_shift_pattern: detected:\n");
2203 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2204 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2205 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2207 if (dump_enabled_p ())
2208 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2210 stmts->safe_push (last_stmt);
2211 return pattern_stmt;
2214 /* Return true iff the target has a vector optab implementing the operation
2215 CODE on type VECTYPE. */
2217 static bool
2218 target_has_vecop_for_code (tree_code code, tree vectype)
2220 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2221 return voptab
2222 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2225 /* Verify that the target has optabs of VECTYPE to perform all the steps
2226 needed by the multiplication-by-immediate synthesis algorithm described by
2227 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2228 present. Return true iff the target supports all the steps. */
2230 static bool
2231 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2232 tree vectype, bool synth_shift_p)
2234 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2235 return false;
2237 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2238 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2240 if (var == negate_variant
2241 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2242 return false;
2244 /* If we must synthesize shifts with additions make sure that vector
2245 addition is available. */
2246 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2247 return false;
2249 for (int i = 1; i < alg->ops; i++)
2251 switch (alg->op[i])
2253 case alg_shift:
2254 break;
2255 case alg_add_t_m2:
2256 case alg_add_t2_m:
2257 case alg_add_factor:
2258 if (!supports_vplus)
2259 return false;
2260 break;
2261 case alg_sub_t_m2:
2262 case alg_sub_t2_m:
2263 case alg_sub_factor:
2264 if (!supports_vminus)
2265 return false;
2266 break;
2267 case alg_unknown:
2268 case alg_m:
2269 case alg_zero:
2270 case alg_impossible:
2271 return false;
2272 default:
2273 gcc_unreachable ();
2277 return true;
2280 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2281 putting the final result in DEST. Append all statements but the last into
2282 VINFO. Return the last statement. */
2284 static gimple *
2285 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2286 stmt_vec_info vinfo)
2288 HOST_WIDE_INT i;
2289 tree itype = TREE_TYPE (op);
2290 tree prev_res = op;
2291 gcc_assert (amnt >= 0);
2292 for (i = 0; i < amnt; i++)
2294 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2295 : dest;
2296 gimple *stmt
2297 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2298 prev_res = tmp_var;
2299 if (i < amnt - 1)
2300 append_pattern_def_seq (vinfo, stmt);
2301 else
2302 return stmt;
2304 gcc_unreachable ();
2305 return NULL;
2308 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2309 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2310 the process if necessary. Append the resulting assignment statements
2311 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2312 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2313 left shifts using additions. */
2315 static tree
2316 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2317 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2319 if (integer_zerop (op2)
2320 && (code == LSHIFT_EXPR
2321 || code == PLUS_EXPR))
2323 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2324 return op1;
2327 gimple *stmt;
2328 tree itype = TREE_TYPE (op1);
2329 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2331 if (code == LSHIFT_EXPR
2332 && synth_shift_p)
2334 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2335 stmt_vinfo);
2336 append_pattern_def_seq (stmt_vinfo, stmt);
2337 return tmp_var;
2340 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2341 append_pattern_def_seq (stmt_vinfo, stmt);
2342 return tmp_var;
2345 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2346 and simple arithmetic operations to be vectorized. Record the statements
2347 produced in STMT_VINFO and return the last statement in the sequence or
2348 NULL if it's not possible to synthesize such a multiplication.
2349 This function mirrors the behavior of expand_mult_const in expmed.c but
2350 works on tree-ssa form. */
2352 static gimple *
2353 vect_synth_mult_by_constant (tree op, tree val,
2354 stmt_vec_info stmt_vinfo)
2356 tree itype = TREE_TYPE (op);
2357 machine_mode mode = TYPE_MODE (itype);
2358 struct algorithm alg;
2359 mult_variant variant;
2360 if (!tree_fits_shwi_p (val))
2361 return NULL;
2363 /* Multiplication synthesis by shifts, adds and subs can introduce
2364 signed overflow where the original operation didn't. Perform the
2365 operations on an unsigned type and cast back to avoid this.
2366 In the future we may want to relax this for synthesis algorithms
2367 that we can prove do not cause unexpected overflow. */
2368 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2370 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2372 /* Targets that don't support vector shifts but support vector additions
2373 can synthesize shifts that way. */
2374 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2376 HOST_WIDE_INT hwval = tree_to_shwi (val);
2377 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2378 The vectorizer's benefit analysis will decide whether it's beneficial
2379 to do this. */
2380 bool possible = choose_mult_variant (mode, hwval, &alg,
2381 &variant, MAX_COST);
2382 if (!possible)
2383 return NULL;
2385 tree vectype = get_vectype_for_scalar_type (multtype);
2387 if (!vectype
2388 || !target_supports_mult_synth_alg (&alg, variant,
2389 vectype, synth_shift_p))
2390 return NULL;
2392 tree accumulator;
2394 /* Clear out the sequence of statements so we can populate it below. */
2395 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2396 gimple *stmt = NULL;
2398 if (cast_to_unsigned_p)
2400 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2401 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2402 append_pattern_def_seq (stmt_vinfo, stmt);
2403 op = tmp_op;
2406 if (alg.op[0] == alg_zero)
2407 accumulator = build_int_cst (multtype, 0);
2408 else
2409 accumulator = op;
2411 bool needs_fixup = (variant == negate_variant)
2412 || (variant == add_variant);
2414 for (int i = 1; i < alg.ops; i++)
2416 tree shft_log = build_int_cst (multtype, alg.log[i]);
2417 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2418 tree tmp_var = NULL_TREE;
2420 switch (alg.op[i])
2422 case alg_shift:
2423 if (synth_shift_p)
2424 stmt
2425 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2426 stmt_vinfo);
2427 else
2428 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2429 shft_log);
2430 break;
2431 case alg_add_t_m2:
2432 tmp_var
2433 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2434 stmt_vinfo, synth_shift_p);
2435 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2436 tmp_var);
2437 break;
2438 case alg_sub_t_m2:
2439 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2440 shft_log, stmt_vinfo,
2441 synth_shift_p);
2442 /* In some algorithms the first step involves zeroing the
2443 accumulator. If subtracting from such an accumulator
2444 just emit the negation directly. */
2445 if (integer_zerop (accumulator))
2446 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2447 else
2448 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2449 tmp_var);
2450 break;
2451 case alg_add_t2_m:
2452 tmp_var
2453 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2454 stmt_vinfo, synth_shift_p);
2455 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2456 break;
2457 case alg_sub_t2_m:
2458 tmp_var
2459 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2460 stmt_vinfo, synth_shift_p);
2461 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2462 break;
2463 case alg_add_factor:
2464 tmp_var
2465 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2466 stmt_vinfo, synth_shift_p);
2467 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2468 tmp_var);
2469 break;
2470 case alg_sub_factor:
2471 tmp_var
2472 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2473 stmt_vinfo, synth_shift_p);
2474 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2475 accumulator);
2476 break;
2477 default:
2478 gcc_unreachable ();
2480 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2481 but rather return it directly. */
2483 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2484 append_pattern_def_seq (stmt_vinfo, stmt);
2485 accumulator = accum_tmp;
2487 if (variant == negate_variant)
2489 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2490 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2491 accumulator = accum_tmp;
2492 if (cast_to_unsigned_p)
2493 append_pattern_def_seq (stmt_vinfo, stmt);
2495 else if (variant == add_variant)
2497 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2498 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2499 accumulator = accum_tmp;
2500 if (cast_to_unsigned_p)
2501 append_pattern_def_seq (stmt_vinfo, stmt);
2503 /* Move back to a signed if needed. */
2504 if (cast_to_unsigned_p)
2506 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2507 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2510 return stmt;
2513 /* Detect multiplication by constant and convert it into a sequence of
2514 shifts and additions, subtractions, negations. We reuse the
2515 choose_mult_variant algorithms from expmed.c
2517 Input/Output:
2519 STMTS: Contains a stmt from which the pattern search begins,
2520 i.e. the mult stmt.
2522 Output:
2524 * TYPE_IN: The type of the input arguments to the pattern.
2526 * TYPE_OUT: The type of the output of this pattern.
2528 * Return value: A new stmt that will be used to replace
2529 the multiplication. */
2531 static gimple *
2532 vect_recog_mult_pattern (vec<gimple *> *stmts,
2533 tree *type_in, tree *type_out)
2535 gimple *last_stmt = stmts->pop ();
2536 tree oprnd0, oprnd1, vectype, itype;
2537 gimple *pattern_stmt;
2538 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2540 if (!is_gimple_assign (last_stmt))
2541 return NULL;
2543 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2544 return NULL;
2546 oprnd0 = gimple_assign_rhs1 (last_stmt);
2547 oprnd1 = gimple_assign_rhs2 (last_stmt);
2548 itype = TREE_TYPE (oprnd0);
2550 if (TREE_CODE (oprnd0) != SSA_NAME
2551 || TREE_CODE (oprnd1) != INTEGER_CST
2552 || !INTEGRAL_TYPE_P (itype)
2553 || !type_has_mode_precision_p (itype))
2554 return NULL;
2556 vectype = get_vectype_for_scalar_type (itype);
2557 if (vectype == NULL_TREE)
2558 return NULL;
2560 /* If the target can handle vectorized multiplication natively,
2561 don't attempt to optimize this. */
2562 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2563 if (mul_optab != unknown_optab)
2565 machine_mode vec_mode = TYPE_MODE (vectype);
2566 int icode = (int) optab_handler (mul_optab, vec_mode);
2567 if (icode != CODE_FOR_nothing)
2568 return NULL;
2571 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2572 if (!pattern_stmt)
2573 return NULL;
2575 /* Pattern detected. */
2576 if (dump_enabled_p ())
2577 dump_printf_loc (MSG_NOTE, vect_location,
2578 "vect_recog_mult_pattern: detected:\n");
2580 if (dump_enabled_p ())
2581 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2582 pattern_stmt,0);
2584 stmts->safe_push (last_stmt);
2585 *type_in = vectype;
2586 *type_out = vectype;
2588 return pattern_stmt;
2591 /* Detect a signed division by a constant that wouldn't be
2592 otherwise vectorized:
2594 type a_t, b_t;
2596 S1 a_t = b_t / N;
2598 where type 'type' is an integral type and N is a constant.
2600 Similarly handle modulo by a constant:
2602 S4 a_t = b_t % N;
2604 Input/Output:
2606 * STMTS: Contains a stmt from which the pattern search begins,
2607 i.e. the division stmt. S1 is replaced by if N is a power
2608 of two constant and type is signed:
2609 S3 y_t = b_t < 0 ? N - 1 : 0;
2610 S2 x_t = b_t + y_t;
2611 S1' a_t = x_t >> log2 (N);
2613 S4 is replaced if N is a power of two constant and
2614 type is signed by (where *_T temporaries have unsigned type):
2615 S9 y_T = b_t < 0 ? -1U : 0U;
2616 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2617 S7 z_t = (type) z_T;
2618 S6 w_t = b_t + z_t;
2619 S5 x_t = w_t & (N - 1);
2620 S4' a_t = x_t - z_t;
2622 Output:
2624 * TYPE_IN: The type of the input arguments to the pattern.
2626 * TYPE_OUT: The type of the output of this pattern.
2628 * Return value: A new stmt that will be used to replace the division
2629 S1 or modulo S4 stmt. */
2631 static gimple *
2632 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2633 tree *type_in, tree *type_out)
2635 gimple *last_stmt = stmts->pop ();
2636 tree oprnd0, oprnd1, vectype, itype, cond;
2637 gimple *pattern_stmt, *def_stmt;
2638 enum tree_code rhs_code;
2639 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2640 vec_info *vinfo = stmt_vinfo->vinfo;
2641 optab optab;
2642 tree q;
2643 int dummy_int, prec;
2644 stmt_vec_info def_stmt_vinfo;
2646 if (!is_gimple_assign (last_stmt))
2647 return NULL;
2649 rhs_code = gimple_assign_rhs_code (last_stmt);
2650 switch (rhs_code)
2652 case TRUNC_DIV_EXPR:
2653 case TRUNC_MOD_EXPR:
2654 break;
2655 default:
2656 return NULL;
2659 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2660 return NULL;
2662 oprnd0 = gimple_assign_rhs1 (last_stmt);
2663 oprnd1 = gimple_assign_rhs2 (last_stmt);
2664 itype = TREE_TYPE (oprnd0);
2665 if (TREE_CODE (oprnd0) != SSA_NAME
2666 || TREE_CODE (oprnd1) != INTEGER_CST
2667 || TREE_CODE (itype) != INTEGER_TYPE
2668 || !type_has_mode_precision_p (itype))
2669 return NULL;
2671 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2672 vectype = get_vectype_for_scalar_type (itype);
2673 if (vectype == NULL_TREE)
2674 return NULL;
2676 /* If the target can handle vectorized division or modulo natively,
2677 don't attempt to optimize this. */
2678 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2679 if (optab != unknown_optab)
2681 machine_mode vec_mode = TYPE_MODE (vectype);
2682 int icode = (int) optab_handler (optab, vec_mode);
2683 if (icode != CODE_FOR_nothing)
2684 return NULL;
2687 prec = TYPE_PRECISION (itype);
2688 if (integer_pow2p (oprnd1))
2690 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2691 return NULL;
2693 /* Pattern detected. */
2694 if (dump_enabled_p ())
2695 dump_printf_loc (MSG_NOTE, vect_location,
2696 "vect_recog_divmod_pattern: detected:\n");
2698 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2699 build_int_cst (itype, 0));
2700 if (rhs_code == TRUNC_DIV_EXPR)
2702 tree var = vect_recog_temp_ssa_var (itype, NULL);
2703 tree shift;
2704 def_stmt
2705 = gimple_build_assign (var, COND_EXPR, cond,
2706 fold_build2 (MINUS_EXPR, itype, oprnd1,
2707 build_int_cst (itype, 1)),
2708 build_int_cst (itype, 0));
2709 new_pattern_def_seq (stmt_vinfo, def_stmt);
2710 var = vect_recog_temp_ssa_var (itype, NULL);
2711 def_stmt
2712 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2713 gimple_assign_lhs (def_stmt));
2714 append_pattern_def_seq (stmt_vinfo, def_stmt);
2716 shift = build_int_cst (itype, tree_log2 (oprnd1));
2717 pattern_stmt
2718 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2719 RSHIFT_EXPR, var, shift);
2721 else
2723 tree signmask;
2724 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2725 if (compare_tree_int (oprnd1, 2) == 0)
2727 signmask = vect_recog_temp_ssa_var (itype, NULL);
2728 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2729 build_int_cst (itype, 1),
2730 build_int_cst (itype, 0));
2731 append_pattern_def_seq (stmt_vinfo, def_stmt);
2733 else
2735 tree utype
2736 = build_nonstandard_integer_type (prec, 1);
2737 tree vecutype = get_vectype_for_scalar_type (utype);
2738 tree shift
2739 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2740 - tree_log2 (oprnd1));
2741 tree var = vect_recog_temp_ssa_var (utype, NULL);
2743 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2744 build_int_cst (utype, -1),
2745 build_int_cst (utype, 0));
2746 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2747 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2748 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2749 append_pattern_def_seq (stmt_vinfo, def_stmt);
2750 var = vect_recog_temp_ssa_var (utype, NULL);
2751 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2752 gimple_assign_lhs (def_stmt),
2753 shift);
2754 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2755 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2756 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2757 append_pattern_def_seq (stmt_vinfo, def_stmt);
2758 signmask = vect_recog_temp_ssa_var (itype, NULL);
2759 def_stmt
2760 = gimple_build_assign (signmask, NOP_EXPR, var);
2761 append_pattern_def_seq (stmt_vinfo, def_stmt);
2763 def_stmt
2764 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2765 PLUS_EXPR, oprnd0, signmask);
2766 append_pattern_def_seq (stmt_vinfo, def_stmt);
2767 def_stmt
2768 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2769 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2770 fold_build2 (MINUS_EXPR, itype, oprnd1,
2771 build_int_cst (itype, 1)));
2772 append_pattern_def_seq (stmt_vinfo, def_stmt);
2774 pattern_stmt
2775 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2776 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2777 signmask);
2780 if (dump_enabled_p ())
2781 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2784 stmts->safe_push (last_stmt);
2786 *type_in = vectype;
2787 *type_out = vectype;
2788 return pattern_stmt;
2791 if (prec > HOST_BITS_PER_WIDE_INT
2792 || integer_zerop (oprnd1))
2793 return NULL;
2795 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2796 return NULL;
2798 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2800 if (TYPE_UNSIGNED (itype))
2802 unsigned HOST_WIDE_INT mh, ml;
2803 int pre_shift, post_shift;
2804 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2805 & GET_MODE_MASK (itype_mode));
2806 tree t1, t2, t3, t4;
2808 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2809 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2810 return NULL;
2812 /* Find a suitable multiplier and right shift count
2813 instead of multiplying with D. */
2814 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2816 /* If the suggested multiplier is more than SIZE bits, we can do better
2817 for even divisors, using an initial right shift. */
2818 if (mh != 0 && (d & 1) == 0)
2820 pre_shift = ctz_or_zero (d);
2821 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2822 &ml, &post_shift, &dummy_int);
2823 gcc_assert (!mh);
2825 else
2826 pre_shift = 0;
2828 if (mh != 0)
2830 if (post_shift - 1 >= prec)
2831 return NULL;
2833 /* t1 = oprnd0 h* ml;
2834 t2 = oprnd0 - t1;
2835 t3 = t2 >> 1;
2836 t4 = t1 + t3;
2837 q = t4 >> (post_shift - 1); */
2838 t1 = vect_recog_temp_ssa_var (itype, NULL);
2839 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2840 build_int_cst (itype, ml));
2841 append_pattern_def_seq (stmt_vinfo, def_stmt);
2843 t2 = vect_recog_temp_ssa_var (itype, NULL);
2844 def_stmt
2845 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2846 append_pattern_def_seq (stmt_vinfo, def_stmt);
2848 t3 = vect_recog_temp_ssa_var (itype, NULL);
2849 def_stmt
2850 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2851 append_pattern_def_seq (stmt_vinfo, def_stmt);
2853 t4 = vect_recog_temp_ssa_var (itype, NULL);
2854 def_stmt
2855 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2857 if (post_shift != 1)
2859 append_pattern_def_seq (stmt_vinfo, def_stmt);
2861 q = vect_recog_temp_ssa_var (itype, NULL);
2862 pattern_stmt
2863 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2864 build_int_cst (itype, post_shift - 1));
2866 else
2868 q = t4;
2869 pattern_stmt = def_stmt;
2872 else
2874 if (pre_shift >= prec || post_shift >= prec)
2875 return NULL;
2877 /* t1 = oprnd0 >> pre_shift;
2878 t2 = t1 h* ml;
2879 q = t2 >> post_shift; */
2880 if (pre_shift)
2882 t1 = vect_recog_temp_ssa_var (itype, NULL);
2883 def_stmt
2884 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2885 build_int_cst (NULL, pre_shift));
2886 append_pattern_def_seq (stmt_vinfo, def_stmt);
2888 else
2889 t1 = oprnd0;
2891 t2 = vect_recog_temp_ssa_var (itype, NULL);
2892 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2893 build_int_cst (itype, ml));
2895 if (post_shift)
2897 append_pattern_def_seq (stmt_vinfo, def_stmt);
2899 q = vect_recog_temp_ssa_var (itype, NULL);
2900 def_stmt
2901 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2902 build_int_cst (itype, post_shift));
2904 else
2905 q = t2;
2907 pattern_stmt = def_stmt;
2910 else
2912 unsigned HOST_WIDE_INT ml;
2913 int post_shift;
2914 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2915 unsigned HOST_WIDE_INT abs_d;
2916 bool add = false;
2917 tree t1, t2, t3, t4;
2919 /* Give up for -1. */
2920 if (d == -1)
2921 return NULL;
2923 /* Since d might be INT_MIN, we have to cast to
2924 unsigned HOST_WIDE_INT before negating to avoid
2925 undefined signed overflow. */
2926 abs_d = (d >= 0
2927 ? (unsigned HOST_WIDE_INT) d
2928 : - (unsigned HOST_WIDE_INT) d);
2930 /* n rem d = n rem -d */
2931 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2933 d = abs_d;
2934 oprnd1 = build_int_cst (itype, abs_d);
2936 else if (HOST_BITS_PER_WIDE_INT >= prec
2937 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2938 /* This case is not handled correctly below. */
2939 return NULL;
2941 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2942 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2944 add = true;
2945 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2947 if (post_shift >= prec)
2948 return NULL;
2950 /* t1 = oprnd0 h* ml; */
2951 t1 = vect_recog_temp_ssa_var (itype, NULL);
2952 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2953 build_int_cst (itype, ml));
2955 if (add)
2957 /* t2 = t1 + oprnd0; */
2958 append_pattern_def_seq (stmt_vinfo, def_stmt);
2959 t2 = vect_recog_temp_ssa_var (itype, NULL);
2960 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2962 else
2963 t2 = t1;
2965 if (post_shift)
2967 /* t3 = t2 >> post_shift; */
2968 append_pattern_def_seq (stmt_vinfo, def_stmt);
2969 t3 = vect_recog_temp_ssa_var (itype, NULL);
2970 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2971 build_int_cst (itype, post_shift));
2973 else
2974 t3 = t2;
2976 wide_int oprnd0_min, oprnd0_max;
2977 int msb = 1;
2978 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2980 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2981 msb = 0;
2982 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2983 msb = -1;
2986 if (msb == 0 && d >= 0)
2988 /* q = t3; */
2989 q = t3;
2990 pattern_stmt = def_stmt;
2992 else
2994 /* t4 = oprnd0 >> (prec - 1);
2995 or if we know from VRP that oprnd0 >= 0
2996 t4 = 0;
2997 or if we know from VRP that oprnd0 < 0
2998 t4 = -1; */
2999 append_pattern_def_seq (stmt_vinfo, def_stmt);
3000 t4 = vect_recog_temp_ssa_var (itype, NULL);
3001 if (msb != 1)
3002 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3003 build_int_cst (itype, msb));
3004 else
3005 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3006 build_int_cst (itype, prec - 1));
3007 append_pattern_def_seq (stmt_vinfo, def_stmt);
3009 /* q = t3 - t4; or q = t4 - t3; */
3010 q = vect_recog_temp_ssa_var (itype, NULL);
3011 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3012 d < 0 ? t3 : t4);
3016 if (rhs_code == TRUNC_MOD_EXPR)
3018 tree r, t1;
3020 /* We divided. Now finish by:
3021 t1 = q * oprnd1;
3022 r = oprnd0 - t1; */
3023 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3025 t1 = vect_recog_temp_ssa_var (itype, NULL);
3026 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3027 append_pattern_def_seq (stmt_vinfo, def_stmt);
3029 r = vect_recog_temp_ssa_var (itype, NULL);
3030 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3033 /* Pattern detected. */
3034 if (dump_enabled_p ())
3036 dump_printf_loc (MSG_NOTE, vect_location,
3037 "vect_recog_divmod_pattern: detected: ");
3038 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3041 stmts->safe_push (last_stmt);
3043 *type_in = vectype;
3044 *type_out = vectype;
3045 return pattern_stmt;
3048 /* Function vect_recog_mixed_size_cond_pattern
3050 Try to find the following pattern:
3052 type x_t, y_t;
3053 TYPE a_T, b_T, c_T;
3054 loop:
3055 S1 a_T = x_t CMP y_t ? b_T : c_T;
3057 where type 'TYPE' is an integral type which has different size
3058 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3059 than 'type', the constants need to fit into an integer type
3060 with the same width as 'type') or results of conversion from 'type'.
3062 Input:
3064 * LAST_STMT: A stmt from which the pattern search begins.
3066 Output:
3068 * TYPE_IN: The type of the input arguments to the pattern.
3070 * TYPE_OUT: The type of the output of this pattern.
3072 * Return value: A new stmt that will be used to replace the pattern.
3073 Additionally a def_stmt is added.
3075 a_it = x_t CMP y_t ? b_it : c_it;
3076 a_T = (TYPE) a_it; */
3078 static gimple *
3079 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
3080 tree *type_out)
3082 gimple *last_stmt = (*stmts)[0];
3083 tree cond_expr, then_clause, else_clause;
3084 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3085 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3086 gimple *pattern_stmt, *def_stmt;
3087 vec_info *vinfo = stmt_vinfo->vinfo;
3088 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3089 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3090 bool promotion;
3091 tree comp_scalar_type;
3093 if (!is_gimple_assign (last_stmt)
3094 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3095 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3096 return NULL;
3098 cond_expr = gimple_assign_rhs1 (last_stmt);
3099 then_clause = gimple_assign_rhs2 (last_stmt);
3100 else_clause = gimple_assign_rhs3 (last_stmt);
3102 if (!COMPARISON_CLASS_P (cond_expr))
3103 return NULL;
3105 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3106 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3107 if (comp_vectype == NULL_TREE)
3108 return NULL;
3110 type = gimple_expr_type (last_stmt);
3111 if (types_compatible_p (type, comp_scalar_type)
3112 || ((TREE_CODE (then_clause) != INTEGER_CST
3113 || TREE_CODE (else_clause) != INTEGER_CST)
3114 && !INTEGRAL_TYPE_P (comp_scalar_type))
3115 || !INTEGRAL_TYPE_P (type))
3116 return NULL;
3118 if ((TREE_CODE (then_clause) != INTEGER_CST
3119 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3120 &def_stmt0, &promotion))
3121 || (TREE_CODE (else_clause) != INTEGER_CST
3122 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3123 &def_stmt1, &promotion)))
3124 return NULL;
3126 if (orig_type0 && orig_type1
3127 && !types_compatible_p (orig_type0, orig_type1))
3128 return NULL;
3130 if (orig_type0)
3132 if (!types_compatible_p (orig_type0, comp_scalar_type))
3133 return NULL;
3134 then_clause = gimple_assign_rhs1 (def_stmt0);
3135 itype = orig_type0;
3138 if (orig_type1)
3140 if (!types_compatible_p (orig_type1, comp_scalar_type))
3141 return NULL;
3142 else_clause = gimple_assign_rhs1 (def_stmt1);
3143 itype = orig_type1;
3147 HOST_WIDE_INT cmp_mode_size
3148 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3150 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3151 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3152 return NULL;
3154 vectype = get_vectype_for_scalar_type (type);
3155 if (vectype == NULL_TREE)
3156 return NULL;
3158 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3159 return NULL;
3161 if (itype == NULL_TREE)
3162 itype = build_nonstandard_integer_type (cmp_mode_size,
3163 TYPE_UNSIGNED (type));
3165 if (itype == NULL_TREE
3166 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3167 return NULL;
3169 vecitype = get_vectype_for_scalar_type (itype);
3170 if (vecitype == NULL_TREE)
3171 return NULL;
3173 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3174 return NULL;
3176 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3178 if ((TREE_CODE (then_clause) == INTEGER_CST
3179 && !int_fits_type_p (then_clause, itype))
3180 || (TREE_CODE (else_clause) == INTEGER_CST
3181 && !int_fits_type_p (else_clause, itype)))
3182 return NULL;
3185 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3186 COND_EXPR, unshare_expr (cond_expr),
3187 fold_convert (itype, then_clause),
3188 fold_convert (itype, else_clause));
3189 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3190 NOP_EXPR, gimple_assign_lhs (def_stmt));
3192 new_pattern_def_seq (stmt_vinfo, def_stmt);
3193 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3194 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3195 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3196 *type_in = vecitype;
3197 *type_out = vectype;
3199 if (dump_enabled_p ())
3200 dump_printf_loc (MSG_NOTE, vect_location,
3201 "vect_recog_mixed_size_cond_pattern: detected:\n");
3203 return pattern_stmt;
3207 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3208 true if bool VAR can and should be optimized that way. Assume it shouldn't
3209 in case it's a result of a comparison which can be directly vectorized into
3210 a vector comparison. Fills in STMTS with all stmts visited during the
3211 walk. */
3213 static bool
3214 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3216 gimple *def_stmt;
3217 enum vect_def_type dt;
3218 tree rhs1;
3219 enum tree_code rhs_code;
3221 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3222 return false;
3224 if (dt != vect_internal_def)
3225 return false;
3227 if (!is_gimple_assign (def_stmt))
3228 return false;
3230 if (stmts.contains (def_stmt))
3231 return true;
3233 rhs1 = gimple_assign_rhs1 (def_stmt);
3234 rhs_code = gimple_assign_rhs_code (def_stmt);
3235 switch (rhs_code)
3237 case SSA_NAME:
3238 if (! check_bool_pattern (rhs1, vinfo, stmts))
3239 return false;
3240 break;
3242 CASE_CONVERT:
3243 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3244 return false;
3245 if (! check_bool_pattern (rhs1, vinfo, stmts))
3246 return false;
3247 break;
3249 case BIT_NOT_EXPR:
3250 if (! check_bool_pattern (rhs1, vinfo, stmts))
3251 return false;
3252 break;
3254 case BIT_AND_EXPR:
3255 case BIT_IOR_EXPR:
3256 case BIT_XOR_EXPR:
3257 if (! check_bool_pattern (rhs1, vinfo, stmts)
3258 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3259 return false;
3260 break;
3262 default:
3263 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3265 tree vecitype, comp_vectype;
3267 /* If the comparison can throw, then is_gimple_condexpr will be
3268 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3269 if (stmt_could_throw_p (def_stmt))
3270 return false;
3272 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3273 if (comp_vectype == NULL_TREE)
3274 return false;
3276 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3277 if (mask_type
3278 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3279 return false;
3281 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3283 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3284 tree itype
3285 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3286 vecitype = get_vectype_for_scalar_type (itype);
3287 if (vecitype == NULL_TREE)
3288 return false;
3290 else
3291 vecitype = comp_vectype;
3292 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3293 return false;
3295 else
3296 return false;
3297 break;
3300 bool res = stmts.add (def_stmt);
3301 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3302 gcc_assert (!res);
3304 return true;
3308 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3309 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3310 pattern sequence. */
3312 static tree
3313 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3315 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3316 NOP_EXPR, var);
3317 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3318 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3319 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3320 append_pattern_def_seq (stmt_info, cast_stmt);
3321 return gimple_assign_lhs (cast_stmt);
3324 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3325 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3326 type, OUT_TYPE is the desired final integer type of the whole pattern.
3327 STMT_INFO is the info of the pattern root and is where pattern stmts should
3328 be associated with. DEFS is a map of pattern defs. */
3330 static void
3331 adjust_bool_pattern (tree var, tree out_type,
3332 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3334 gimple *stmt = SSA_NAME_DEF_STMT (var);
3335 enum tree_code rhs_code, def_rhs_code;
3336 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3337 location_t loc;
3338 gimple *pattern_stmt, *def_stmt;
3339 tree trueval = NULL_TREE;
3341 rhs1 = gimple_assign_rhs1 (stmt);
3342 rhs2 = gimple_assign_rhs2 (stmt);
3343 rhs_code = gimple_assign_rhs_code (stmt);
3344 loc = gimple_location (stmt);
3345 switch (rhs_code)
3347 case SSA_NAME:
3348 CASE_CONVERT:
3349 irhs1 = *defs.get (rhs1);
3350 itype = TREE_TYPE (irhs1);
3351 pattern_stmt
3352 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3353 SSA_NAME, irhs1);
3354 break;
3356 case BIT_NOT_EXPR:
3357 irhs1 = *defs.get (rhs1);
3358 itype = TREE_TYPE (irhs1);
3359 pattern_stmt
3360 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3361 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3362 break;
3364 case BIT_AND_EXPR:
3365 /* Try to optimize x = y & (a < b ? 1 : 0); into
3366 x = (a < b ? y : 0);
3368 E.g. for:
3369 bool a_b, b_b, c_b;
3370 TYPE d_T;
3372 S1 a_b = x1 CMP1 y1;
3373 S2 b_b = x2 CMP2 y2;
3374 S3 c_b = a_b & b_b;
3375 S4 d_T = (TYPE) c_b;
3377 we would normally emit:
3379 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3380 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3381 S3' c_T = a_T & b_T;
3382 S4' d_T = c_T;
3384 but we can save one stmt by using the
3385 result of one of the COND_EXPRs in the other COND_EXPR and leave
3386 BIT_AND_EXPR stmt out:
3388 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3389 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3390 S4' f_T = c_T;
3392 At least when VEC_COND_EXPR is implemented using masks
3393 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3394 computes the comparison masks and ands it, in one case with
3395 all ones vector, in the other case with a vector register.
3396 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3397 often more expensive. */
3398 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3399 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3400 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3402 irhs1 = *defs.get (rhs1);
3403 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3404 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3405 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3407 rhs_code = def_rhs_code;
3408 rhs1 = def_rhs1;
3409 rhs2 = gimple_assign_rhs2 (def_stmt);
3410 trueval = irhs1;
3411 goto do_compare;
3413 else
3414 irhs2 = *defs.get (rhs2);
3415 goto and_ior_xor;
3417 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3418 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3419 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3421 irhs2 = *defs.get (rhs2);
3422 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3423 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3424 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3426 rhs_code = def_rhs_code;
3427 rhs1 = def_rhs1;
3428 rhs2 = gimple_assign_rhs2 (def_stmt);
3429 trueval = irhs2;
3430 goto do_compare;
3432 else
3433 irhs1 = *defs.get (rhs1);
3434 goto and_ior_xor;
3436 /* FALLTHRU */
3437 case BIT_IOR_EXPR:
3438 case BIT_XOR_EXPR:
3439 irhs1 = *defs.get (rhs1);
3440 irhs2 = *defs.get (rhs2);
3441 and_ior_xor:
3442 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3443 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3445 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3446 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3447 int out_prec = TYPE_PRECISION (out_type);
3448 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3449 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3450 stmt_info);
3451 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3452 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3453 stmt_info);
3454 else
3456 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3457 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3460 itype = TREE_TYPE (irhs1);
3461 pattern_stmt
3462 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3463 rhs_code, irhs1, irhs2);
3464 break;
3466 default:
3467 do_compare:
3468 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3469 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3470 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3471 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3472 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3474 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3475 itype
3476 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3478 else
3479 itype = TREE_TYPE (rhs1);
3480 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3481 if (trueval == NULL_TREE)
3482 trueval = build_int_cst (itype, 1);
3483 else
3484 gcc_checking_assert (useless_type_conversion_p (itype,
3485 TREE_TYPE (trueval)));
3486 pattern_stmt
3487 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3488 COND_EXPR, cond_expr, trueval,
3489 build_int_cst (itype, 0));
3490 break;
3493 gimple_set_location (pattern_stmt, loc);
3494 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3495 pattern def seq stmts instead of just letting auto-detection do
3496 its work? */
3497 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3498 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3499 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3500 append_pattern_def_seq (stmt_info, pattern_stmt);
3501 defs.put (var, gimple_assign_lhs (pattern_stmt));
3504 /* Comparison function to qsort a vector of gimple stmts after UID. */
3506 static int
3507 sort_after_uid (const void *p1, const void *p2)
3509 const gimple *stmt1 = *(const gimple * const *)p1;
3510 const gimple *stmt2 = *(const gimple * const *)p2;
3511 return gimple_uid (stmt1) - gimple_uid (stmt2);
3514 /* Create pattern stmts for all stmts participating in the bool pattern
3515 specified by BOOL_STMT_SET and its root STMT with the desired type
3516 OUT_TYPE. Return the def of the pattern root. */
3518 static tree
3519 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3520 tree out_type, gimple *stmt)
3522 /* Gather original stmts in the bool pattern in their order of appearance
3523 in the IL. */
3524 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3525 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3526 i != bool_stmt_set.end (); ++i)
3527 bool_stmts.quick_push (*i);
3528 bool_stmts.qsort (sort_after_uid);
3530 /* Now process them in that order, producing pattern stmts. */
3531 hash_map <tree, tree> defs;
3532 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3533 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3534 out_type, vinfo_for_stmt (stmt), defs);
3536 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3537 gimple *pattern_stmt
3538 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3539 return gimple_assign_lhs (pattern_stmt);
3542 /* Helper for search_type_for_mask. */
3544 static tree
3545 search_type_for_mask_1 (tree var, vec_info *vinfo,
3546 hash_map<gimple *, tree> &cache)
3548 gimple *def_stmt;
3549 enum vect_def_type dt;
3550 tree rhs1;
3551 enum tree_code rhs_code;
3552 tree res = NULL_TREE, res2;
3554 if (TREE_CODE (var) != SSA_NAME)
3555 return NULL_TREE;
3557 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3558 return NULL_TREE;
3560 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3561 return NULL_TREE;
3563 if (dt != vect_internal_def)
3564 return NULL_TREE;
3566 if (!is_gimple_assign (def_stmt))
3567 return NULL_TREE;
3569 tree *c = cache.get (def_stmt);
3570 if (c)
3571 return *c;
3573 rhs_code = gimple_assign_rhs_code (def_stmt);
3574 rhs1 = gimple_assign_rhs1 (def_stmt);
3576 switch (rhs_code)
3578 case SSA_NAME:
3579 case BIT_NOT_EXPR:
3580 CASE_CONVERT:
3581 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3582 break;
3584 case BIT_AND_EXPR:
3585 case BIT_IOR_EXPR:
3586 case BIT_XOR_EXPR:
3587 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3588 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3589 cache);
3590 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3591 res = res2;
3592 break;
3594 default:
3595 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3597 tree comp_vectype, mask_type;
3599 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3601 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3602 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3603 vinfo, cache);
3604 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3605 res = res2;
3606 break;
3609 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3610 if (comp_vectype == NULL_TREE)
3612 res = NULL_TREE;
3613 break;
3616 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3617 if (!mask_type
3618 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3620 res = NULL_TREE;
3621 break;
3624 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3625 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3627 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3628 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3630 else
3631 res = TREE_TYPE (rhs1);
3635 cache.put (def_stmt, res);
3636 return res;
3639 /* Return the proper type for converting bool VAR into
3640 an integer value or NULL_TREE if no such type exists.
3641 The type is chosen so that converted value has the
3642 same number of elements as VAR's vector type. */
3644 static tree
3645 search_type_for_mask (tree var, vec_info *vinfo)
3647 hash_map<gimple *, tree> cache;
3648 return search_type_for_mask_1 (var, vinfo, cache);
3651 /* Function vect_recog_bool_pattern
3653 Try to find pattern like following:
3655 bool a_b, b_b, c_b, d_b, e_b;
3656 TYPE f_T;
3657 loop:
3658 S1 a_b = x1 CMP1 y1;
3659 S2 b_b = x2 CMP2 y2;
3660 S3 c_b = a_b & b_b;
3661 S4 d_b = x3 CMP3 y3;
3662 S5 e_b = c_b | d_b;
3663 S6 f_T = (TYPE) e_b;
3665 where type 'TYPE' is an integral type. Or a similar pattern
3666 ending in
3668 S6 f_Y = e_b ? r_Y : s_Y;
3670 as results from if-conversion of a complex condition.
3672 Input:
3674 * LAST_STMT: A stmt at the end from which the pattern
3675 search begins, i.e. cast of a bool to
3676 an integer type.
3678 Output:
3680 * TYPE_IN: The type of the input arguments to the pattern.
3682 * TYPE_OUT: The type of the output of this pattern.
3684 * Return value: A new stmt that will be used to replace the pattern.
3686 Assuming size of TYPE is the same as size of all comparisons
3687 (otherwise some casts would be added where needed), the above
3688 sequence we create related pattern stmts:
3689 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3690 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3691 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3692 S5' e_T = c_T | d_T;
3693 S6' f_T = e_T;
3695 Instead of the above S3' we could emit:
3696 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3697 S3' c_T = a_T | b_T;
3698 but the above is more efficient. */
3700 static gimple *
3701 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3702 tree *type_out)
3704 gimple *last_stmt = stmts->pop ();
3705 enum tree_code rhs_code;
3706 tree var, lhs, rhs, vectype;
3707 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3708 stmt_vec_info new_stmt_info;
3709 vec_info *vinfo = stmt_vinfo->vinfo;
3710 gimple *pattern_stmt;
3712 if (!is_gimple_assign (last_stmt))
3713 return NULL;
3715 var = gimple_assign_rhs1 (last_stmt);
3716 lhs = gimple_assign_lhs (last_stmt);
3718 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3719 return NULL;
3721 hash_set<gimple *> bool_stmts;
3723 rhs_code = gimple_assign_rhs_code (last_stmt);
3724 if (CONVERT_EXPR_CODE_P (rhs_code))
3726 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3727 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3728 return NULL;
3729 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3730 if (vectype == NULL_TREE)
3731 return NULL;
3733 if (check_bool_pattern (var, vinfo, bool_stmts))
3735 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3736 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3737 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3738 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3739 else
3740 pattern_stmt
3741 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3743 else
3745 tree type = search_type_for_mask (var, vinfo);
3746 tree cst0, cst1, tmp;
3748 if (!type)
3749 return NULL;
3751 /* We may directly use cond with narrowed type to avoid
3752 multiple cond exprs with following result packing and
3753 perform single cond with packed mask instead. In case
3754 of widening we better make cond first and then extract
3755 results. */
3756 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3757 type = TREE_TYPE (lhs);
3759 cst0 = build_int_cst (type, 0);
3760 cst1 = build_int_cst (type, 1);
3761 tmp = vect_recog_temp_ssa_var (type, NULL);
3762 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3764 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3766 tree new_vectype = get_vectype_for_scalar_type (type);
3767 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3768 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3769 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3770 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3772 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3773 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3777 *type_out = vectype;
3778 *type_in = vectype;
3779 stmts->safe_push (last_stmt);
3780 if (dump_enabled_p ())
3781 dump_printf_loc (MSG_NOTE, vect_location,
3782 "vect_recog_bool_pattern: detected:\n");
3784 return pattern_stmt;
3786 else if (rhs_code == COND_EXPR
3787 && TREE_CODE (var) == SSA_NAME)
3789 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3790 if (vectype == NULL_TREE)
3791 return NULL;
3793 /* Build a scalar type for the boolean result that when
3794 vectorized matches the vector type of the result in
3795 size and number of elements. */
3796 unsigned prec
3797 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3798 TYPE_VECTOR_SUBPARTS (vectype));
3800 tree type
3801 = build_nonstandard_integer_type (prec,
3802 TYPE_UNSIGNED (TREE_TYPE (var)));
3803 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3804 return NULL;
3806 if (!check_bool_pattern (var, vinfo, bool_stmts))
3807 return NULL;
3809 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3811 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3812 pattern_stmt
3813 = gimple_build_assign (lhs, COND_EXPR,
3814 build2 (NE_EXPR, boolean_type_node,
3815 rhs, build_int_cst (type, 0)),
3816 gimple_assign_rhs2 (last_stmt),
3817 gimple_assign_rhs3 (last_stmt));
3818 *type_out = vectype;
3819 *type_in = vectype;
3820 stmts->safe_push (last_stmt);
3821 if (dump_enabled_p ())
3822 dump_printf_loc (MSG_NOTE, vect_location,
3823 "vect_recog_bool_pattern: detected:\n");
3825 return pattern_stmt;
3827 else if (rhs_code == SSA_NAME
3828 && STMT_VINFO_DATA_REF (stmt_vinfo))
3830 stmt_vec_info pattern_stmt_info;
3831 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3832 gcc_assert (vectype != NULL_TREE);
3833 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3834 return NULL;
3836 if (check_bool_pattern (var, vinfo, bool_stmts))
3837 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3838 else
3840 tree type = search_type_for_mask (var, vinfo);
3841 tree cst0, cst1, new_vectype;
3843 if (!type)
3844 return NULL;
3846 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3847 type = TREE_TYPE (vectype);
3849 cst0 = build_int_cst (type, 0);
3850 cst1 = build_int_cst (type, 1);
3851 new_vectype = get_vectype_for_scalar_type (type);
3853 rhs = vect_recog_temp_ssa_var (type, NULL);
3854 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3856 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3857 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3858 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3859 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3862 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3863 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3865 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3866 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3867 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3868 rhs = rhs2;
3870 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3871 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3872 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3873 STMT_VINFO_DATA_REF (pattern_stmt_info)
3874 = STMT_VINFO_DATA_REF (stmt_vinfo);
3875 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3876 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3877 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3878 *type_out = vectype;
3879 *type_in = vectype;
3880 stmts->safe_push (last_stmt);
3881 if (dump_enabled_p ())
3882 dump_printf_loc (MSG_NOTE, vect_location,
3883 "vect_recog_bool_pattern: detected:\n");
3884 return pattern_stmt;
3886 else
3887 return NULL;
3891 /* A helper for vect_recog_mask_conversion_pattern. Build
3892 conversion of MASK to a type suitable for masking VECTYPE.
3893 Built statement gets required vectype and is appended to
3894 a pattern sequence of STMT_VINFO.
3896 Return converted mask. */
3898 static tree
3899 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3900 vec_info *vinfo)
3902 gimple *stmt;
3903 tree masktype, tmp;
3904 stmt_vec_info new_stmt_info;
3906 masktype = build_same_sized_truth_vector_type (vectype);
3907 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3908 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3909 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3910 set_vinfo_for_stmt (stmt, new_stmt_info);
3911 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3912 append_pattern_def_seq (stmt_vinfo, stmt);
3914 return tmp;
3918 /* Function vect_recog_mask_conversion_pattern
3920 Try to find statements which require boolean type
3921 converison. Additional conversion statements are
3922 added to handle such cases. For example:
3924 bool m_1, m_2, m_3;
3925 int i_4, i_5;
3926 double d_6, d_7;
3927 char c_1, c_2, c_3;
3929 S1 m_1 = i_4 > i_5;
3930 S2 m_2 = d_6 < d_7;
3931 S3 m_3 = m_1 & m_2;
3932 S4 c_1 = m_3 ? c_2 : c_3;
3934 Will be transformed into:
3936 S1 m_1 = i_4 > i_5;
3937 S2 m_2 = d_6 < d_7;
3938 S3'' m_2' = (_Bool[bitsize=32])m_2
3939 S3' m_3' = m_1 & m_2';
3940 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3941 S4' c_1' = m_3'' ? c_2 : c_3; */
3943 static gimple *
3944 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3945 tree *type_out)
3947 gimple *last_stmt = stmts->pop ();
3948 enum tree_code rhs_code;
3949 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3950 tree vectype1, vectype2;
3951 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3952 stmt_vec_info pattern_stmt_info;
3953 vec_info *vinfo = stmt_vinfo->vinfo;
3955 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3956 if (is_gimple_call (last_stmt)
3957 && gimple_call_internal_p (last_stmt)
3958 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3959 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3961 gcall *pattern_stmt;
3962 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3964 if (load)
3966 lhs = gimple_call_lhs (last_stmt);
3967 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3969 else
3971 rhs2 = gimple_call_arg (last_stmt, 3);
3972 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3975 rhs1 = gimple_call_arg (last_stmt, 2);
3976 rhs1_type = search_type_for_mask (rhs1, vinfo);
3977 if (!rhs1_type)
3978 return NULL;
3979 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3981 if (!vectype1 || !vectype2
3982 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3983 TYPE_VECTOR_SUBPARTS (vectype2)))
3984 return NULL;
3986 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3988 if (load)
3990 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3991 pattern_stmt
3992 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3993 gimple_call_arg (last_stmt, 0),
3994 gimple_call_arg (last_stmt, 1),
3995 tmp);
3996 gimple_call_set_lhs (pattern_stmt, lhs);
3998 else
3999 pattern_stmt
4000 = gimple_build_call_internal (IFN_MASK_STORE, 4,
4001 gimple_call_arg (last_stmt, 0),
4002 gimple_call_arg (last_stmt, 1),
4003 tmp,
4004 gimple_call_arg (last_stmt, 3));
4006 gimple_call_set_nothrow (pattern_stmt, true);
4008 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4009 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4010 STMT_VINFO_DATA_REF (pattern_stmt_info)
4011 = STMT_VINFO_DATA_REF (stmt_vinfo);
4012 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4013 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
4014 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
4016 *type_out = vectype1;
4017 *type_in = vectype1;
4018 stmts->safe_push (last_stmt);
4019 if (dump_enabled_p ())
4020 dump_printf_loc (MSG_NOTE, vect_location,
4021 "vect_recog_mask_conversion_pattern: detected:\n");
4023 return pattern_stmt;
4026 if (!is_gimple_assign (last_stmt))
4027 return NULL;
4029 gimple *pattern_stmt;
4030 lhs = gimple_assign_lhs (last_stmt);
4031 rhs1 = gimple_assign_rhs1 (last_stmt);
4032 rhs_code = gimple_assign_rhs_code (last_stmt);
4034 /* Check for cond expression requiring mask conversion. */
4035 if (rhs_code == COND_EXPR)
4037 /* vect_recog_mixed_size_cond_pattern could apply.
4038 Do nothing then. */
4039 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4040 return NULL;
4042 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4044 if (TREE_CODE (rhs1) == SSA_NAME)
4046 rhs1_type = search_type_for_mask (rhs1, vinfo);
4047 if (!rhs1_type)
4048 return NULL;
4050 else if (COMPARISON_CLASS_P (rhs1))
4052 /* Check whether we're comparing scalar booleans and (if so)
4053 whether a better mask type exists than the mask associated
4054 with boolean-sized elements. This avoids unnecessary packs
4055 and unpacks if the booleans are set from comparisons of
4056 wider types. E.g. in:
4058 int x1, x2, x3, x4, y1, y1;
4060 bool b1 = (x1 == x2);
4061 bool b2 = (x3 == x4);
4062 ... = b1 == b2 ? y1 : y2;
4064 it is better for b1 and b2 to use the mask type associated
4065 with int elements rather bool (byte) elements. */
4066 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4067 if (!rhs1_type)
4068 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4070 else
4071 return NULL;
4073 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4075 if (!vectype1 || !vectype2)
4076 return NULL;
4078 /* Continue if a conversion is needed. Also continue if we have
4079 a comparison whose vector type would normally be different from
4080 VECTYPE2 when considered in isolation. In that case we'll
4081 replace the comparison with an SSA name (so that we can record
4082 its vector type) and behave as though the comparison was an SSA
4083 name from the outset. */
4084 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4085 TYPE_VECTOR_SUBPARTS (vectype2))
4086 && (TREE_CODE (rhs1) == SSA_NAME
4087 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4088 return NULL;
4090 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4091 in place, we can handle it in vectorizable_condition. This avoids
4092 unnecessary promotion stmts and increased vectorization factor. */
4093 if (COMPARISON_CLASS_P (rhs1)
4094 && INTEGRAL_TYPE_P (rhs1_type)
4095 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4096 TYPE_VECTOR_SUBPARTS (vectype2)))
4098 gimple *dummy;
4099 enum vect_def_type dt;
4100 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
4101 &dummy, &dt)
4102 && dt == vect_external_def
4103 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
4104 &dummy, &dt)
4105 && (dt == vect_external_def
4106 || dt == vect_constant_def))
4108 tree wide_scalar_type = build_nonstandard_integer_type
4109 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4110 TYPE_UNSIGNED (rhs1_type));
4111 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4112 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4113 return NULL;
4117 /* If rhs1 is a comparison we need to move it into a
4118 separate statement. */
4119 if (TREE_CODE (rhs1) != SSA_NAME)
4121 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4122 pattern_stmt = gimple_build_assign (tmp, rhs1);
4123 rhs1 = tmp;
4125 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4126 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4127 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4128 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4131 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4132 TYPE_VECTOR_SUBPARTS (vectype2)))
4133 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4134 else
4135 tmp = rhs1;
4137 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4138 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4139 gimple_assign_rhs2 (last_stmt),
4140 gimple_assign_rhs3 (last_stmt));
4142 *type_out = vectype1;
4143 *type_in = vectype1;
4144 stmts->safe_push (last_stmt);
4145 if (dump_enabled_p ())
4146 dump_printf_loc (MSG_NOTE, vect_location,
4147 "vect_recog_mask_conversion_pattern: detected:\n");
4149 return pattern_stmt;
4152 /* Now check for binary boolean operations requiring conversion for
4153 one of operands. */
4154 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4155 return NULL;
4157 if (rhs_code != BIT_IOR_EXPR
4158 && rhs_code != BIT_XOR_EXPR
4159 && rhs_code != BIT_AND_EXPR
4160 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4161 return NULL;
4163 rhs2 = gimple_assign_rhs2 (last_stmt);
4165 rhs1_type = search_type_for_mask (rhs1, vinfo);
4166 rhs2_type = search_type_for_mask (rhs2, vinfo);
4168 if (!rhs1_type || !rhs2_type
4169 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4170 return NULL;
4172 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4174 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4175 if (!vectype1)
4176 return NULL;
4177 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4179 else
4181 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4182 if (!vectype1)
4183 return NULL;
4184 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4187 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4188 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4190 *type_out = vectype1;
4191 *type_in = vectype1;
4192 stmts->safe_push (last_stmt);
4193 if (dump_enabled_p ())
4194 dump_printf_loc (MSG_NOTE, vect_location,
4195 "vect_recog_mask_conversion_pattern: detected:\n");
4197 return pattern_stmt;
4200 /* STMT is a load or store. If the load or store is conditional, return
4201 the boolean condition under which it occurs, otherwise return null. */
4203 static tree
4204 vect_get_load_store_mask (gimple *stmt)
4206 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4208 gcc_assert (gimple_assign_single_p (def_assign));
4209 return NULL_TREE;
4212 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4214 internal_fn ifn = gimple_call_internal_fn (def_call);
4215 int mask_index = internal_fn_mask_index (ifn);
4216 return gimple_call_arg (def_call, mask_index);
4219 gcc_unreachable ();
4222 /* Return the scalar offset type that an internal gather/scatter function
4223 should use. GS_INFO describes the gather/scatter operation. */
4225 static tree
4226 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4228 tree offset_type = TREE_TYPE (gs_info->offset);
4229 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4231 /* Enforced by vect_check_gather_scatter. */
4232 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4233 gcc_assert (element_bits >= offset_bits);
4235 /* If the offset is narrower than the elements, extend it according
4236 to its sign. */
4237 if (element_bits > offset_bits)
4238 return build_nonstandard_integer_type (element_bits,
4239 TYPE_UNSIGNED (offset_type));
4241 return offset_type;
4244 /* Return MASK if MASK is suitable for masking an operation on vectors
4245 of type VECTYPE, otherwise convert it into such a form and return
4246 the result. Associate any conversion statements with STMT_INFO's
4247 pattern. */
4249 static tree
4250 vect_convert_mask_for_vectype (tree mask, tree vectype,
4251 stmt_vec_info stmt_info, vec_info *vinfo)
4253 tree mask_type = search_type_for_mask (mask, vinfo);
4254 if (mask_type)
4256 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4257 if (mask_vectype
4258 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4259 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4260 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4262 return mask;
4265 /* Return the equivalent of:
4267 fold_convert (TYPE, VALUE)
4269 with the expectation that the operation will be vectorized.
4270 If new statements are needed, add them as pattern statements
4271 to STMT_INFO. */
4273 static tree
4274 vect_add_conversion_to_patterm (tree type, tree value,
4275 stmt_vec_info stmt_info,
4276 vec_info *vinfo)
4278 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4279 return value;
4281 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4282 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4283 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4284 set_vinfo_for_stmt (conversion, new_stmt_info);
4285 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4286 append_pattern_def_seq (stmt_info, conversion);
4287 return new_value;
4290 /* Try to convert STMT into a call to a gather load or scatter store
4291 internal function. Return the final statement on success and set
4292 *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4294 This function only handles gathers and scatters that were recognized
4295 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4297 static gimple *
4298 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4299 tree *type_in, tree *type_out)
4301 /* Currently we only support this for loop vectorization. */
4302 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4303 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4304 if (!loop_vinfo)
4305 return NULL;
4307 /* Make sure that we're looking at a gather load or scatter store. */
4308 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4309 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4310 return NULL;
4312 /* Get the boolean that controls whether the load or store happens.
4313 This is null if the operation is unconditional. */
4314 tree mask = vect_get_load_store_mask (stmt);
4316 /* Make sure that the target supports an appropriate internal
4317 function for the gather/scatter operation. */
4318 gather_scatter_info gs_info;
4319 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4320 || gs_info.decl)
4321 return NULL;
4323 /* Convert the mask to the right form. */
4324 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4325 if (mask)
4326 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4327 loop_vinfo);
4329 /* Get the invariant base and non-invariant offset, converting the
4330 latter to the same width as the vector elements. */
4331 tree base = gs_info.base;
4332 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4333 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4334 last_stmt_info, loop_vinfo);
4336 /* Build the new pattern statement. */
4337 tree scale = size_int (gs_info.scale);
4338 gcall *pattern_stmt;
4339 if (DR_IS_READ (dr))
4341 if (mask != NULL)
4342 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4343 offset, scale, mask);
4344 else
4345 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4346 offset, scale);
4347 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4348 gimple_call_set_lhs (pattern_stmt, load_lhs);
4350 else
4352 tree rhs = vect_get_store_rhs (stmt);
4353 if (mask != NULL)
4354 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4355 base, offset, scale, rhs,
4356 mask);
4357 else
4358 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4359 base, offset, scale, rhs);
4361 gimple_call_set_nothrow (pattern_stmt, true);
4363 /* Copy across relevant vectorization info and associate DR with the
4364 new pattern statement instead of the original statement. */
4365 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4366 loop_vinfo);
4367 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4368 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4369 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4370 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4371 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4372 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4373 DR_STMT (dr) = pattern_stmt;
4375 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4376 *type_out = vectype;
4377 *type_in = vectype;
4379 if (dump_enabled_p ())
4380 dump_printf_loc (MSG_NOTE, vect_location,
4381 "gather/scatter pattern detected:\n");
4383 return pattern_stmt;
4386 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4388 static gimple *
4389 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
4390 tree *type_out)
4392 gimple *last_stmt = stmts->pop ();
4393 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4394 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4395 last_stmt_info,
4396 type_in, type_out);
4397 if (pattern_stmt)
4398 stmts->safe_push (last_stmt);
4399 return pattern_stmt;
4402 /* Mark statements that are involved in a pattern. */
4404 static inline void
4405 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4406 tree pattern_vectype)
4408 stmt_vec_info pattern_stmt_info, def_stmt_info;
4409 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4410 vec_info *vinfo = orig_stmt_info->vinfo;
4411 gimple *def_stmt;
4413 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4414 if (pattern_stmt_info == NULL)
4416 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4417 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4419 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4421 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4422 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4423 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4424 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4425 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4426 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4427 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
4428 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4429 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
4431 gimple_stmt_iterator si;
4432 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
4433 !gsi_end_p (si); gsi_next (&si))
4435 def_stmt = gsi_stmt (si);
4436 def_stmt_info = vinfo_for_stmt (def_stmt);
4437 if (def_stmt_info == NULL)
4439 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4440 set_vinfo_for_stmt (def_stmt, def_stmt_info);
4442 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4443 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4444 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4445 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4446 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4451 /* Function vect_pattern_recog_1
4453 Input:
4454 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4455 computation pattern.
4456 STMT: A stmt from which the pattern search should start.
4458 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4459 expression that computes the same functionality and can be used to
4460 replace the sequence of stmts that are involved in the pattern.
4462 Output:
4463 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4464 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4465 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4466 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4467 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4468 to the available target pattern.
4470 This function also does some bookkeeping, as explained in the documentation
4471 for vect_recog_pattern. */
4473 static bool
4474 vect_pattern_recog_1 (vect_recog_func *recog_func,
4475 gimple_stmt_iterator si,
4476 vec<gimple *> *stmts_to_replace)
4478 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4479 stmt_vec_info stmt_info;
4480 loop_vec_info loop_vinfo;
4481 tree pattern_vectype;
4482 tree type_in, type_out;
4483 enum tree_code code;
4484 int i;
4485 gimple *next;
4487 stmts_to_replace->truncate (0);
4488 stmts_to_replace->quick_push (stmt);
4489 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4490 if (!pattern_stmt)
4491 return false;
4493 stmt = stmts_to_replace->last ();
4494 stmt_info = vinfo_for_stmt (stmt);
4495 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4497 if (VECTOR_BOOLEAN_TYPE_P (type_in)
4498 || VECTOR_TYPE_P (type_in))
4500 /* No need to check target support (already checked by the pattern
4501 recognition function). */
4502 pattern_vectype = type_out ? type_out : type_in;
4504 else
4506 /* Check target support */
4507 type_in = get_vectype_for_scalar_type (type_in);
4508 if (!type_in)
4509 return false;
4510 if (type_out)
4511 type_out = get_vectype_for_scalar_type (type_out);
4512 else
4513 type_out = type_in;
4514 if (!type_out)
4515 return false;
4516 pattern_vectype = type_out;
4518 if (is_gimple_assign (pattern_stmt))
4520 enum insn_code icode;
4521 code = gimple_assign_rhs_code (pattern_stmt);
4522 optab optab = optab_for_tree_code (code, type_in, optab_default);
4523 machine_mode vec_mode = TYPE_MODE (type_in);
4524 if (!optab
4525 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4526 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4527 return false;
4529 else
4530 gcc_assert (is_gimple_call (pattern_stmt));
4533 /* Found a vectorizable pattern. */
4534 if (dump_enabled_p ())
4536 dump_printf_loc (MSG_NOTE, vect_location,
4537 "%s pattern recognized: ", recog_func->name);
4538 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4541 /* Mark the stmts that are involved in the pattern. */
4542 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4544 /* Patterns cannot be vectorized using SLP, because they change the order of
4545 computation. */
4546 if (loop_vinfo)
4547 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
4548 if (next == stmt)
4549 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
4551 /* It is possible that additional pattern stmts are created and inserted in
4552 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4553 relevant statements. */
4554 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4555 && (unsigned) i < (stmts_to_replace->length () - 1);
4556 i++)
4558 stmt_info = vinfo_for_stmt (stmt);
4559 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4560 if (dump_enabled_p ())
4562 dump_printf_loc (MSG_NOTE, vect_location,
4563 "additional pattern stmt: ");
4564 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4567 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4570 return true;
4574 /* Function vect_pattern_recog
4576 Input:
4577 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4578 computation idioms.
4580 Output - for each computation idiom that is detected we create a new stmt
4581 that provides the same functionality and that can be vectorized. We
4582 also record some information in the struct_stmt_info of the relevant
4583 stmts, as explained below:
4585 At the entry to this function we have the following stmts, with the
4586 following initial value in the STMT_VINFO fields:
4588 stmt in_pattern_p related_stmt vec_stmt
4589 S1: a_i = .... - - -
4590 S2: a_2 = ..use(a_i).. - - -
4591 S3: a_1 = ..use(a_2).. - - -
4592 S4: a_0 = ..use(a_1).. - - -
4593 S5: ... = ..use(a_0).. - - -
4595 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4596 represented by a single stmt. We then:
4597 - create a new stmt S6 equivalent to the pattern (the stmt is not
4598 inserted into the code)
4599 - fill in the STMT_VINFO fields as follows:
4601 in_pattern_p related_stmt vec_stmt
4602 S1: a_i = .... - - -
4603 S2: a_2 = ..use(a_i).. - - -
4604 S3: a_1 = ..use(a_2).. - - -
4605 S4: a_0 = ..use(a_1).. true S6 -
4606 '---> S6: a_new = .... - S4 -
4607 S5: ... = ..use(a_0).. - - -
4609 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4610 to each other through the RELATED_STMT field).
4612 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4613 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4614 remain irrelevant unless used by stmts other than S4.
4616 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4617 (because they are marked as irrelevant). It will vectorize S6, and record
4618 a pointer to the new vector stmt VS6 from S6 (as usual).
4619 S4 will be skipped, and S5 will be vectorized as usual:
4621 in_pattern_p related_stmt vec_stmt
4622 S1: a_i = .... - - -
4623 S2: a_2 = ..use(a_i).. - - -
4624 S3: a_1 = ..use(a_2).. - - -
4625 > VS6: va_new = .... - - -
4626 S4: a_0 = ..use(a_1).. true S6 VS6
4627 '---> S6: a_new = .... - S4 VS6
4628 > VS5: ... = ..vuse(va_new).. - - -
4629 S5: ... = ..use(a_0).. - - -
4631 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4632 elsewhere), and we'll end up with:
4634 VS6: va_new = ....
4635 VS5: ... = ..vuse(va_new)..
4637 In case of more than one pattern statements, e.g., widen-mult with
4638 intermediate type:
4640 S1 a_t = ;
4641 S2 a_T = (TYPE) a_t;
4642 '--> S3: a_it = (interm_type) a_t;
4643 S4 prod_T = a_T * CONST;
4644 '--> S5: prod_T' = a_it w* CONST;
4646 there may be other users of a_T outside the pattern. In that case S2 will
4647 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4648 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4649 be recorded in S3. */
4651 void
4652 vect_pattern_recog (vec_info *vinfo)
4654 struct loop *loop;
4655 basic_block *bbs;
4656 unsigned int nbbs;
4657 gimple_stmt_iterator si;
4658 unsigned int i, j;
4659 auto_vec<gimple *, 1> stmts_to_replace;
4660 gimple *stmt;
4662 if (dump_enabled_p ())
4663 dump_printf_loc (MSG_NOTE, vect_location,
4664 "=== vect_pattern_recog ===\n");
4666 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4668 loop = LOOP_VINFO_LOOP (loop_vinfo);
4669 bbs = LOOP_VINFO_BBS (loop_vinfo);
4670 nbbs = loop->num_nodes;
4672 /* Scan through the loop stmts, applying the pattern recognition
4673 functions starting at each stmt visited: */
4674 for (i = 0; i < nbbs; i++)
4676 basic_block bb = bbs[i];
4677 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4679 /* Scan over all generic vect_recog_xxx_pattern functions. */
4680 for (j = 0; j < NUM_PATTERNS; j++)
4681 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4682 &stmts_to_replace))
4683 break;
4687 else
4689 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4690 for (si = bb_vinfo->region_begin;
4691 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4693 if ((stmt = gsi_stmt (si))
4694 && vinfo_for_stmt (stmt)
4695 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4696 continue;
4698 /* Scan over all generic vect_recog_xxx_pattern functions. */
4699 for (j = 0; j < NUM_PATTERNS; j++)
4700 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4701 &stmts_to_replace))
4702 break;