2018-06-01 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-patterns.c
blobf99484f1da5c84455889c00ad4b64b65a2bf3b30
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Pattern recognition functions */
51 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
52 tree *);
53 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
54 tree *);
55 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
56 tree *);
57 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
58 tree *);
59 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
60 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
61 tree *);
62 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
63 tree *, tree *);
64 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
65 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
66 tree *, tree *);
67 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
68 tree *, tree *);
70 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
71 tree *, tree *);
73 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
74 tree *, tree *);
75 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
76 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
77 static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
78 tree *);
80 struct vect_recog_func
82 vect_recog_func_ptr fn;
83 const char *name;
86 /* Note that ordering matters - the first pattern matching on a stmt
87 is taken which means usually the more complex one needs to preceed
88 the less comples onex (widen_sum only after dot_prod or sad for example). */
89 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
90 { vect_recog_widen_mult_pattern, "widen_mult" },
91 { vect_recog_dot_prod_pattern, "dot_prod" },
92 { vect_recog_sad_pattern, "sad" },
93 { vect_recog_widen_sum_pattern, "widen_sum" },
94 { vect_recog_pow_pattern, "pow" },
95 { vect_recog_widen_shift_pattern, "widen_shift" },
96 { vect_recog_over_widening_pattern, "over_widening" },
97 { vect_recog_rotate_pattern, "rotate" },
98 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
99 { vect_recog_divmod_pattern, "divmod" },
100 { vect_recog_mult_pattern, "mult" },
101 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
102 { vect_recog_bool_pattern, "bool" },
103 /* This must come before mask conversion, and includes the parts
104 of mask conversion that are needed for gather and scatter
105 internal functions. */
106 { vect_recog_gather_scatter_pattern, "gather_scatter" },
107 { vect_recog_mask_conversion_pattern, "mask_conversion" }
110 static inline void
111 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
113 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
114 stmt);
117 static inline void
118 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
120 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
121 append_pattern_def_seq (stmt_info, stmt);
124 /* Check whether STMT2 is in the same loop or basic block as STMT1.
125 Which of the two applies depends on whether we're currently doing
126 loop-based or basic-block-based vectorization, as determined by
127 the vinfo_for_stmt for STMT1 (which must be defined).
129 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
130 to be defined as well. */
132 static bool
133 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
135 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
136 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
139 /* If the LHS of DEF_STMT has a single use, and that statement is
140 in the same loop or basic block, return it. */
142 static gimple *
143 vect_single_imm_use (gimple *def_stmt)
145 tree lhs = gimple_assign_lhs (def_stmt);
146 use_operand_p use_p;
147 gimple *use_stmt;
149 if (!single_imm_use (lhs, &use_p, &use_stmt))
150 return NULL;
152 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
153 return NULL;
155 return use_stmt;
158 /* Check whether NAME, an ssa-name used in USE_STMT,
159 is a result of a type promotion, such that:
160 DEF_STMT: NAME = NOP (name0)
161 If CHECK_SIGN is TRUE, check that either both types are signed or both are
162 unsigned. */
164 static bool
165 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
166 tree *orig_type, gimple **def_stmt, bool *promotion)
168 gimple *dummy_gimple;
169 stmt_vec_info stmt_vinfo;
170 tree type = TREE_TYPE (name);
171 tree oprnd0;
172 enum vect_def_type dt;
174 stmt_vinfo = vinfo_for_stmt (use_stmt);
175 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
176 return false;
178 if (dt != vect_internal_def
179 && dt != vect_external_def && dt != vect_constant_def)
180 return false;
182 if (!*def_stmt)
183 return false;
185 if (dt == vect_internal_def)
187 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
188 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
189 return false;
192 if (!is_gimple_assign (*def_stmt))
193 return false;
195 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
196 return false;
198 oprnd0 = gimple_assign_rhs1 (*def_stmt);
200 *orig_type = TREE_TYPE (oprnd0);
201 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
202 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
203 return false;
205 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
206 *promotion = true;
207 else
208 *promotion = false;
210 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
211 return false;
213 return true;
216 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
217 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
219 static tree
220 vect_recog_temp_ssa_var (tree type, gimple *stmt)
222 return make_temp_ssa_name (type, stmt, "patt");
225 /* Return true if STMT_VINFO describes a reduction for which reassociation
226 is allowed. If STMT_INFO is part of a group, assume that it's part of
227 a reduction chain and optimistically assume that all statements
228 except the last allow reassociation. */
230 static bool
231 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
233 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
234 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
235 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
238 /* Function vect_recog_dot_prod_pattern
240 Try to find the following pattern:
242 type x_t, y_t;
243 TYPE1 prod;
244 TYPE2 sum = init;
245 loop:
246 sum_0 = phi <init, sum_1>
247 S1 x_t = ...
248 S2 y_t = ...
249 S3 x_T = (TYPE1) x_t;
250 S4 y_T = (TYPE1) y_t;
251 S5 prod = x_T * y_T;
252 [S6 prod = (TYPE2) prod; #optional]
253 S7 sum_1 = prod + sum_0;
255 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
256 same size of 'TYPE1' or bigger. This is a special case of a reduction
257 computation.
259 Input:
261 * STMTS: Contains a stmt from which the pattern search begins. In the
262 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
263 will be detected.
265 Output:
267 * TYPE_IN: The type of the input arguments to the pattern.
269 * TYPE_OUT: The type of the output of this pattern.
271 * Return value: A new stmt that will be used to replace the sequence of
272 stmts that constitute the pattern. In this case it will be:
273 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
275 Note: The dot-prod idiom is a widening reduction pattern that is
276 vectorized without preserving all the intermediate results. It
277 produces only N/2 (widened) results (by summing up pairs of
278 intermediate results) rather than all N results. Therefore, we
279 cannot allow this pattern when we want to get all the results and in
280 the correct order (as is the case when this computation is in an
281 inner-loop nested in an outer-loop that us being vectorized). */
283 static gimple *
284 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
285 tree *type_out)
287 gimple *stmt, *last_stmt = (*stmts)[0];
288 tree oprnd0, oprnd1;
289 tree oprnd00, oprnd01;
290 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
291 tree type, half_type;
292 gimple *pattern_stmt;
293 tree prod_type;
294 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
295 struct loop *loop;
296 tree var;
297 bool promotion;
299 if (!loop_info)
300 return NULL;
302 loop = LOOP_VINFO_LOOP (loop_info);
304 /* We don't allow changing the order of the computation in the inner-loop
305 when doing outer-loop vectorization. */
306 if (loop && nested_in_vect_loop_p (loop, last_stmt))
307 return NULL;
309 if (!is_gimple_assign (last_stmt))
310 return NULL;
312 type = gimple_expr_type (last_stmt);
314 /* Look for the following pattern
315 DX = (TYPE1) X;
316 DY = (TYPE1) Y;
317 DPROD = DX * DY;
318 DDPROD = (TYPE2) DPROD;
319 sum_1 = DDPROD + sum_0;
320 In which
321 - DX is double the size of X
322 - DY is double the size of Y
323 - DX, DY, DPROD all have the same type
324 - sum is the same size of DPROD or bigger
325 - sum has been recognized as a reduction variable.
327 This is equivalent to:
328 DPROD = X w* Y; #widen mult
329 sum_1 = DPROD w+ sum_0; #widen summation
331 DPROD = X w* Y; #widen mult
332 sum_1 = DPROD + sum_0; #summation
335 /* Starting from LAST_STMT, follow the defs of its uses in search
336 of the above pattern. */
338 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
339 return NULL;
341 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
343 /* Has been detected as widening-summation? */
345 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
346 type = gimple_expr_type (stmt);
347 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
348 return NULL;
349 oprnd0 = gimple_assign_rhs1 (stmt);
350 oprnd1 = gimple_assign_rhs2 (stmt);
351 half_type = TREE_TYPE (oprnd0);
353 else
355 gimple *def_stmt;
357 if (!vect_reassociating_reduction_p (stmt_vinfo))
358 return NULL;
359 oprnd0 = gimple_assign_rhs1 (last_stmt);
360 oprnd1 = gimple_assign_rhs2 (last_stmt);
361 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
362 || !types_compatible_p (TREE_TYPE (oprnd1), type))
363 return NULL;
364 stmt = last_stmt;
366 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
367 &promotion)
368 && promotion)
370 stmt = def_stmt;
371 oprnd0 = gimple_assign_rhs1 (stmt);
373 else
374 half_type = type;
377 /* So far so good. Since last_stmt was detected as a (summation) reduction,
378 we know that oprnd1 is the reduction variable (defined by a loop-header
379 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
380 Left to check that oprnd0 is defined by a (widen_)mult_expr */
381 if (TREE_CODE (oprnd0) != SSA_NAME)
382 return NULL;
384 prod_type = half_type;
385 stmt = SSA_NAME_DEF_STMT (oprnd0);
387 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
388 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
389 return NULL;
391 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
392 inside the loop (in case we are analyzing an outer-loop). */
393 if (!is_gimple_assign (stmt))
394 return NULL;
395 stmt_vinfo = vinfo_for_stmt (stmt);
396 gcc_assert (stmt_vinfo);
397 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
398 return NULL;
399 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
400 return NULL;
401 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
403 /* Has been detected as a widening multiplication? */
405 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
406 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
407 return NULL;
408 stmt_vinfo = vinfo_for_stmt (stmt);
409 gcc_assert (stmt_vinfo);
410 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
411 oprnd00 = gimple_assign_rhs1 (stmt);
412 oprnd01 = gimple_assign_rhs2 (stmt);
413 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
414 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
416 else
418 tree half_type0, half_type1;
419 gimple *def_stmt;
420 tree oprnd0, oprnd1;
422 oprnd0 = gimple_assign_rhs1 (stmt);
423 oprnd1 = gimple_assign_rhs2 (stmt);
424 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
425 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
426 return NULL;
427 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
428 &promotion)
429 || !promotion)
430 return NULL;
431 oprnd00 = gimple_assign_rhs1 (def_stmt);
432 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
433 &promotion)
434 || !promotion)
435 return NULL;
436 oprnd01 = gimple_assign_rhs1 (def_stmt);
437 if (!types_compatible_p (half_type0, half_type1))
438 return NULL;
439 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
440 return NULL;
443 half_type = TREE_TYPE (oprnd00);
444 *type_in = half_type;
445 *type_out = type;
447 /* Pattern detected. Create a stmt to be used to replace the pattern: */
448 var = vect_recog_temp_ssa_var (type, NULL);
449 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
450 oprnd00, oprnd01, oprnd1);
452 if (dump_enabled_p ())
454 dump_printf_loc (MSG_NOTE, vect_location,
455 "vect_recog_dot_prod_pattern: detected: ");
456 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
459 return pattern_stmt;
463 /* Function vect_recog_sad_pattern
465 Try to find the following Sum of Absolute Difference (SAD) pattern:
467 type x_t, y_t;
468 signed TYPE1 diff, abs_diff;
469 TYPE2 sum = init;
470 loop:
471 sum_0 = phi <init, sum_1>
472 S1 x_t = ...
473 S2 y_t = ...
474 S3 x_T = (TYPE1) x_t;
475 S4 y_T = (TYPE1) y_t;
476 S5 diff = x_T - y_T;
477 S6 abs_diff = ABS_EXPR <diff>;
478 [S7 abs_diff = (TYPE2) abs_diff; #optional]
479 S8 sum_1 = abs_diff + sum_0;
481 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
482 same size of 'TYPE1' or bigger. This is a special case of a reduction
483 computation.
485 Input:
487 * STMTS: Contains a stmt from which the pattern search begins. In the
488 example, when this function is called with S8, the pattern
489 {S3,S4,S5,S6,S7,S8} will be detected.
491 Output:
493 * TYPE_IN: The type of the input arguments to the pattern.
495 * TYPE_OUT: The type of the output of this pattern.
497 * Return value: A new stmt that will be used to replace the sequence of
498 stmts that constitute the pattern. In this case it will be:
499 SAD_EXPR <x_t, y_t, sum_0>
502 static gimple *
503 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
504 tree *type_out)
506 gimple *last_stmt = (*stmts)[0];
507 tree sad_oprnd0, sad_oprnd1;
508 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
509 tree half_type;
510 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
511 struct loop *loop;
512 bool promotion;
514 if (!loop_info)
515 return NULL;
517 loop = LOOP_VINFO_LOOP (loop_info);
519 /* We don't allow changing the order of the computation in the inner-loop
520 when doing outer-loop vectorization. */
521 if (loop && nested_in_vect_loop_p (loop, last_stmt))
522 return NULL;
524 if (!is_gimple_assign (last_stmt))
525 return NULL;
527 tree sum_type = gimple_expr_type (last_stmt);
529 /* Look for the following pattern
530 DX = (TYPE1) X;
531 DY = (TYPE1) Y;
532 DDIFF = DX - DY;
533 DAD = ABS_EXPR <DDIFF>;
534 DDPROD = (TYPE2) DPROD;
535 sum_1 = DAD + sum_0;
536 In which
537 - DX is at least double the size of X
538 - DY is at least double the size of Y
539 - DX, DY, DDIFF, DAD all have the same type
540 - sum is the same size of DAD or bigger
541 - sum has been recognized as a reduction variable.
543 This is equivalent to:
544 DDIFF = X w- Y; #widen sub
545 DAD = ABS_EXPR <DDIFF>;
546 sum_1 = DAD w+ sum_0; #widen summation
548 DDIFF = X w- Y; #widen sub
549 DAD = ABS_EXPR <DDIFF>;
550 sum_1 = DAD + sum_0; #summation
553 /* Starting from LAST_STMT, follow the defs of its uses in search
554 of the above pattern. */
556 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
557 return NULL;
559 tree plus_oprnd0, plus_oprnd1;
561 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
563 /* Has been detected as widening-summation? */
565 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
566 sum_type = gimple_expr_type (stmt);
567 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
568 return NULL;
569 plus_oprnd0 = gimple_assign_rhs1 (stmt);
570 plus_oprnd1 = gimple_assign_rhs2 (stmt);
571 half_type = TREE_TYPE (plus_oprnd0);
573 else
575 gimple *def_stmt;
577 if (!vect_reassociating_reduction_p (stmt_vinfo))
578 return NULL;
579 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
580 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
581 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
582 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
583 return NULL;
585 /* The type conversion could be promotion, demotion,
586 or just signed -> unsigned. */
587 if (type_conversion_p (plus_oprnd0, last_stmt, false,
588 &half_type, &def_stmt, &promotion))
589 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
590 else
591 half_type = sum_type;
594 /* So far so good. Since last_stmt was detected as a (summation) reduction,
595 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
596 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
597 Then check that plus_oprnd0 is defined by an abs_expr. */
599 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
600 return NULL;
602 tree abs_type = half_type;
603 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
605 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
606 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
607 return NULL;
609 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
610 inside the loop (in case we are analyzing an outer-loop). */
611 if (!is_gimple_assign (abs_stmt))
612 return NULL;
614 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
615 gcc_assert (abs_stmt_vinfo);
616 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
617 return NULL;
618 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
619 return NULL;
621 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
622 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
623 return NULL;
624 if (TYPE_UNSIGNED (abs_type))
625 return NULL;
627 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
629 if (TREE_CODE (abs_oprnd) != SSA_NAME)
630 return NULL;
632 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
634 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
635 if (!gimple_bb (diff_stmt)
636 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
637 return NULL;
639 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
640 inside the loop (in case we are analyzing an outer-loop). */
641 if (!is_gimple_assign (diff_stmt))
642 return NULL;
644 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
645 gcc_assert (diff_stmt_vinfo);
646 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
647 return NULL;
648 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
649 return NULL;
651 tree half_type0, half_type1;
652 gimple *def_stmt;
654 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
655 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
657 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
658 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
659 return NULL;
660 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
661 &half_type0, &def_stmt, &promotion)
662 || !promotion)
663 return NULL;
664 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
666 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
667 &half_type1, &def_stmt, &promotion)
668 || !promotion)
669 return NULL;
670 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
672 if (!types_compatible_p (half_type0, half_type1))
673 return NULL;
674 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
675 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
676 return NULL;
678 *type_in = TREE_TYPE (sad_oprnd0);
679 *type_out = sum_type;
681 /* Pattern detected. Create a stmt to be used to replace the pattern: */
682 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
683 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
684 sad_oprnd1, plus_oprnd1);
686 if (dump_enabled_p ())
688 dump_printf_loc (MSG_NOTE, vect_location,
689 "vect_recog_sad_pattern: detected: ");
690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
693 return pattern_stmt;
697 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
698 and LSHIFT_EXPR.
700 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
701 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
703 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
704 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
705 that satisfies the above restrictions, we can perform a widening opeartion
706 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
707 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
709 static bool
710 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
711 tree const_oprnd, tree *oprnd,
712 gimple **wstmt, tree type,
713 tree *half_type, gimple *def_stmt)
715 tree new_type, new_oprnd;
717 if (code != MULT_EXPR && code != LSHIFT_EXPR)
718 return false;
720 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
721 || (code == LSHIFT_EXPR
722 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
723 != 1))
724 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
726 /* CONST_OPRND is a constant of HALF_TYPE. */
727 *oprnd = gimple_assign_rhs1 (def_stmt);
728 return true;
731 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
732 return false;
734 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
735 return false;
737 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
738 a type 2 times bigger than HALF_TYPE. */
739 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
740 TYPE_UNSIGNED (type));
741 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
742 || (code == LSHIFT_EXPR
743 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
744 return false;
746 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
747 *oprnd = gimple_assign_rhs1 (def_stmt);
748 new_oprnd = make_ssa_name (new_type);
749 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
750 *oprnd = new_oprnd;
752 *half_type = new_type;
753 return true;
757 /* Function vect_recog_widen_mult_pattern
759 Try to find the following pattern:
761 type1 a_t;
762 type2 b_t;
763 TYPE a_T, b_T, prod_T;
765 S1 a_t = ;
766 S2 b_t = ;
767 S3 a_T = (TYPE) a_t;
768 S4 b_T = (TYPE) b_t;
769 S5 prod_T = a_T * b_T;
771 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
773 Also detect unsigned cases:
775 unsigned type1 a_t;
776 unsigned type2 b_t;
777 unsigned TYPE u_prod_T;
778 TYPE a_T, b_T, prod_T;
780 S1 a_t = ;
781 S2 b_t = ;
782 S3 a_T = (TYPE) a_t;
783 S4 b_T = (TYPE) b_t;
784 S5 prod_T = a_T * b_T;
785 S6 u_prod_T = (unsigned TYPE) prod_T;
787 and multiplication by constants:
789 type a_t;
790 TYPE a_T, prod_T;
792 S1 a_t = ;
793 S3 a_T = (TYPE) a_t;
794 S5 prod_T = a_T * CONST;
796 A special case of multiplication by constants is when 'TYPE' is 4 times
797 bigger than 'type', but CONST fits an intermediate type 2 times smaller
798 than 'TYPE'. In that case we create an additional pattern stmt for S3
799 to create a variable of the intermediate type, and perform widen-mult
800 on the intermediate type as well:
802 type a_t;
803 interm_type a_it;
804 TYPE a_T, prod_T, prod_T';
806 S1 a_t = ;
807 S3 a_T = (TYPE) a_t;
808 '--> a_it = (interm_type) a_t;
809 S5 prod_T = a_T * CONST;
810 '--> prod_T' = a_it w* CONST;
812 Input/Output:
814 * STMTS: Contains a stmt from which the pattern search begins. In the
815 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
816 is detected. In case of unsigned widen-mult, the original stmt (S5) is
817 replaced with S6 in STMTS. In case of multiplication by a constant
818 of an intermediate type (the last case above), STMTS also contains S3
819 (inserted before S5).
821 Output:
823 * TYPE_IN: The type of the input arguments to the pattern.
825 * TYPE_OUT: The type of the output of this pattern.
827 * Return value: A new stmt that will be used to replace the sequence of
828 stmts that constitute the pattern. In this case it will be:
829 WIDEN_MULT <a_t, b_t>
830 If the result of WIDEN_MULT needs to be converted to a larger type, the
831 returned stmt will be this type conversion stmt.
834 static gimple *
835 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
836 tree *type_in, tree *type_out)
838 gimple *last_stmt = stmts->pop ();
839 gimple *def_stmt0, *def_stmt1;
840 tree oprnd0, oprnd1;
841 tree type, half_type0, half_type1;
842 gimple *new_stmt = NULL, *pattern_stmt = NULL;
843 tree vectype, vecitype;
844 tree var;
845 enum tree_code dummy_code;
846 int dummy_int;
847 vec<tree> dummy_vec;
848 bool op1_ok;
849 bool promotion;
851 if (!is_gimple_assign (last_stmt))
852 return NULL;
854 type = gimple_expr_type (last_stmt);
856 /* Starting from LAST_STMT, follow the defs of its uses in search
857 of the above pattern. */
859 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
860 return NULL;
862 oprnd0 = gimple_assign_rhs1 (last_stmt);
863 oprnd1 = gimple_assign_rhs2 (last_stmt);
864 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
865 || !types_compatible_p (TREE_TYPE (oprnd1), type))
866 return NULL;
868 /* Check argument 0. */
869 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
870 &promotion)
871 || !promotion)
872 return NULL;
873 /* Check argument 1. */
874 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
875 &def_stmt1, &promotion);
877 if (op1_ok && promotion)
879 oprnd0 = gimple_assign_rhs1 (def_stmt0);
880 oprnd1 = gimple_assign_rhs1 (def_stmt1);
882 else
884 if (TREE_CODE (oprnd1) == INTEGER_CST
885 && TREE_CODE (half_type0) == INTEGER_TYPE
886 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
887 &oprnd0, &new_stmt, type,
888 &half_type0, def_stmt0))
890 half_type1 = half_type0;
891 oprnd1 = fold_convert (half_type1, oprnd1);
893 else
894 return NULL;
897 /* If the two arguments have different sizes, convert the one with
898 the smaller type into the larger type. */
899 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
901 /* If we already used up the single-stmt slot give up. */
902 if (new_stmt)
903 return NULL;
905 tree* oprnd = NULL;
906 gimple *def_stmt = NULL;
908 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
910 def_stmt = def_stmt0;
911 half_type0 = half_type1;
912 oprnd = &oprnd0;
914 else
916 def_stmt = def_stmt1;
917 half_type1 = half_type0;
918 oprnd = &oprnd1;
921 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
922 tree new_oprnd = make_ssa_name (half_type0);
923 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
924 *oprnd = new_oprnd;
927 /* Handle unsigned case. Look for
928 S6 u_prod_T = (unsigned TYPE) prod_T;
929 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
930 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
932 gimple *use_stmt;
933 tree use_lhs;
934 tree use_type;
936 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
937 return NULL;
939 use_stmt = vect_single_imm_use (last_stmt);
940 if (!use_stmt || !is_gimple_assign (use_stmt)
941 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
942 return NULL;
944 use_lhs = gimple_assign_lhs (use_stmt);
945 use_type = TREE_TYPE (use_lhs);
946 if (!INTEGRAL_TYPE_P (use_type)
947 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
948 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
949 return NULL;
951 type = use_type;
952 last_stmt = use_stmt;
955 if (!types_compatible_p (half_type0, half_type1))
956 return NULL;
958 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
959 to get an intermediate result of type ITYPE. In this case we need
960 to build a statement to convert this intermediate result to type TYPE. */
961 tree itype = type;
962 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
963 itype = build_nonstandard_integer_type
964 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
965 TYPE_UNSIGNED (type));
967 /* Pattern detected. */
968 if (dump_enabled_p ())
969 dump_printf_loc (MSG_NOTE, vect_location,
970 "vect_recog_widen_mult_pattern: detected:\n");
972 /* Check target support */
973 vectype = get_vectype_for_scalar_type (half_type0);
974 vecitype = get_vectype_for_scalar_type (itype);
975 if (!vectype
976 || !vecitype
977 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
978 vecitype, vectype,
979 &dummy_code, &dummy_code,
980 &dummy_int, &dummy_vec))
981 return NULL;
983 *type_in = vectype;
984 *type_out = get_vectype_for_scalar_type (type);
986 /* Pattern supported. Create a stmt to be used to replace the pattern: */
987 var = vect_recog_temp_ssa_var (itype, NULL);
988 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
990 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
991 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
993 /* If the original two operands have different sizes, we may need to convert
994 the smaller one into the larget type. If this is the case, at this point
995 the new stmt is already built. */
996 if (new_stmt)
998 append_pattern_def_seq (stmt_vinfo, new_stmt);
999 stmt_vec_info new_stmt_info
1000 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
1001 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1002 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1005 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1006 the result of the widen-mult operation into type TYPE. */
1007 if (itype != type)
1009 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1010 stmt_vec_info pattern_stmt_info
1011 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
1012 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1013 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1014 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1015 NOP_EXPR,
1016 gimple_assign_lhs (pattern_stmt));
1019 if (dump_enabled_p ())
1020 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1022 stmts->safe_push (last_stmt);
1023 return pattern_stmt;
1027 /* Function vect_recog_pow_pattern
1029 Try to find the following pattern:
1031 x = POW (y, N);
1033 with POW being one of pow, powf, powi, powif and N being
1034 either 2 or 0.5.
1036 Input:
1038 * LAST_STMT: A stmt from which the pattern search begins.
1040 Output:
1042 * TYPE_IN: The type of the input arguments to the pattern.
1044 * TYPE_OUT: The type of the output of this pattern.
1046 * Return value: A new stmt that will be used to replace the sequence of
1047 stmts that constitute the pattern. In this case it will be:
1048 x = x * x
1050 x = sqrt (x)
1053 static gimple *
1054 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1055 tree *type_out)
1057 gimple *last_stmt = (*stmts)[0];
1058 tree base, exp;
1059 gimple *stmt;
1060 tree var;
1062 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1063 return NULL;
1065 switch (gimple_call_combined_fn (last_stmt))
1067 CASE_CFN_POW:
1068 CASE_CFN_POWI:
1069 break;
1071 default:
1072 return NULL;
1075 base = gimple_call_arg (last_stmt, 0);
1076 exp = gimple_call_arg (last_stmt, 1);
1077 if (TREE_CODE (exp) != REAL_CST
1078 && TREE_CODE (exp) != INTEGER_CST)
1080 if (flag_unsafe_math_optimizations
1081 && TREE_CODE (base) == REAL_CST
1082 && !gimple_call_internal_p (last_stmt))
1084 combined_fn log_cfn;
1085 built_in_function exp_bfn;
1086 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1088 case BUILT_IN_POW:
1089 log_cfn = CFN_BUILT_IN_LOG;
1090 exp_bfn = BUILT_IN_EXP;
1091 break;
1092 case BUILT_IN_POWF:
1093 log_cfn = CFN_BUILT_IN_LOGF;
1094 exp_bfn = BUILT_IN_EXPF;
1095 break;
1096 case BUILT_IN_POWL:
1097 log_cfn = CFN_BUILT_IN_LOGL;
1098 exp_bfn = BUILT_IN_EXPL;
1099 break;
1100 default:
1101 return NULL;
1103 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1104 tree exp_decl = builtin_decl_implicit (exp_bfn);
1105 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1106 does that, but if C is a power of 2, we want to use
1107 exp2 (log2 (C) * x) in the non-vectorized version, but for
1108 vectorization we don't have vectorized exp2. */
1109 if (logc
1110 && TREE_CODE (logc) == REAL_CST
1111 && exp_decl
1112 && lookup_attribute ("omp declare simd",
1113 DECL_ATTRIBUTES (exp_decl)))
1115 cgraph_node *node = cgraph_node::get_create (exp_decl);
1116 if (node->simd_clones == NULL)
1118 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1119 || node->definition)
1120 return NULL;
1121 expand_simd_clones (node);
1122 if (node->simd_clones == NULL)
1123 return NULL;
1125 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1126 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1127 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1128 new_pattern_def_seq (stmt_vinfo, g);
1129 *type_in = TREE_TYPE (base);
1130 *type_out = NULL_TREE;
1131 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1132 g = gimple_build_call (exp_decl, 1, def);
1133 gimple_call_set_lhs (g, res);
1134 return g;
1138 return NULL;
1141 /* We now have a pow or powi builtin function call with a constant
1142 exponent. */
1144 *type_out = NULL_TREE;
1146 /* Catch squaring. */
1147 if ((tree_fits_shwi_p (exp)
1148 && tree_to_shwi (exp) == 2)
1149 || (TREE_CODE (exp) == REAL_CST
1150 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1152 *type_in = TREE_TYPE (base);
1154 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1155 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1156 return stmt;
1159 /* Catch square root. */
1160 if (TREE_CODE (exp) == REAL_CST
1161 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1163 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1164 if (*type_in
1165 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1166 OPTIMIZE_FOR_SPEED))
1168 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1169 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1170 gimple_call_set_lhs (stmt, var);
1171 gimple_call_set_nothrow (stmt, true);
1172 return stmt;
1176 return NULL;
1180 /* Function vect_recog_widen_sum_pattern
1182 Try to find the following pattern:
1184 type x_t;
1185 TYPE x_T, sum = init;
1186 loop:
1187 sum_0 = phi <init, sum_1>
1188 S1 x_t = *p;
1189 S2 x_T = (TYPE) x_t;
1190 S3 sum_1 = x_T + sum_0;
1192 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1193 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1194 a special case of a reduction computation.
1196 Input:
1198 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1199 when this function is called with S3, the pattern {S2,S3} will be detected.
1201 Output:
1203 * TYPE_IN: The type of the input arguments to the pattern.
1205 * TYPE_OUT: The type of the output of this pattern.
1207 * Return value: A new stmt that will be used to replace the sequence of
1208 stmts that constitute the pattern. In this case it will be:
1209 WIDEN_SUM <x_t, sum_0>
1211 Note: The widening-sum idiom is a widening reduction pattern that is
1212 vectorized without preserving all the intermediate results. It
1213 produces only N/2 (widened) results (by summing up pairs of
1214 intermediate results) rather than all N results. Therefore, we
1215 cannot allow this pattern when we want to get all the results and in
1216 the correct order (as is the case when this computation is in an
1217 inner-loop nested in an outer-loop that us being vectorized). */
1219 static gimple *
1220 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1221 tree *type_out)
1223 gimple *stmt, *last_stmt = (*stmts)[0];
1224 tree oprnd0, oprnd1;
1225 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1226 tree type, half_type;
1227 gimple *pattern_stmt;
1228 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1229 struct loop *loop;
1230 tree var;
1231 bool promotion;
1233 if (!loop_info)
1234 return NULL;
1236 loop = LOOP_VINFO_LOOP (loop_info);
1238 /* We don't allow changing the order of the computation in the inner-loop
1239 when doing outer-loop vectorization. */
1240 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1241 return NULL;
1243 if (!is_gimple_assign (last_stmt))
1244 return NULL;
1246 type = gimple_expr_type (last_stmt);
1248 /* Look for the following pattern
1249 DX = (TYPE) X;
1250 sum_1 = DX + sum_0;
1251 In which DX is at least double the size of X, and sum_1 has been
1252 recognized as a reduction variable.
1255 /* Starting from LAST_STMT, follow the defs of its uses in search
1256 of the above pattern. */
1258 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1259 return NULL;
1261 if (!vect_reassociating_reduction_p (stmt_vinfo))
1262 return NULL;
1264 oprnd0 = gimple_assign_rhs1 (last_stmt);
1265 oprnd1 = gimple_assign_rhs2 (last_stmt);
1266 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1267 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1268 return NULL;
1270 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1271 we know that oprnd1 is the reduction variable (defined by a loop-header
1272 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1273 Left to check that oprnd0 is defined by a cast from type 'type' to type
1274 'TYPE'. */
1276 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1277 &promotion)
1278 || !promotion)
1279 return NULL;
1281 oprnd0 = gimple_assign_rhs1 (stmt);
1282 *type_in = half_type;
1283 *type_out = type;
1285 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1286 var = vect_recog_temp_ssa_var (type, NULL);
1287 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1289 if (dump_enabled_p ())
1291 dump_printf_loc (MSG_NOTE, vect_location,
1292 "vect_recog_widen_sum_pattern: detected: ");
1293 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1296 return pattern_stmt;
1300 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1302 Input:
1303 STMT - a statement to check.
1304 DEF - we support operations with two operands, one of which is constant.
1305 The other operand can be defined by a demotion operation, or by a
1306 previous statement in a sequence of over-promoted operations. In the
1307 later case DEF is used to replace that operand. (It is defined by a
1308 pattern statement we created for the previous statement in the
1309 sequence).
1311 Input/output:
1312 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1313 NULL, it's the type of DEF.
1314 STMTS - additional pattern statements. If a pattern statement (type
1315 conversion) is created in this function, its original statement is
1316 added to STMTS.
1318 Output:
1319 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1320 operands to use in the new pattern statement for STMT (will be created
1321 in vect_recog_over_widening_pattern ()).
1322 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1323 statements for STMT: the first one is a type promotion and the second
1324 one is the operation itself. We return the type promotion statement
1325 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1326 the second pattern statement. */
1328 static bool
1329 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1330 tree *op0, tree *op1, gimple **new_def_stmt,
1331 vec<gimple *> *stmts)
1333 enum tree_code code;
1334 tree const_oprnd, oprnd;
1335 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1336 gimple *def_stmt, *new_stmt;
1337 bool first = false;
1338 bool promotion;
1340 *op0 = NULL_TREE;
1341 *op1 = NULL_TREE;
1342 *new_def_stmt = NULL;
1344 if (!is_gimple_assign (stmt))
1345 return false;
1347 code = gimple_assign_rhs_code (stmt);
1348 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1349 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1350 return false;
1352 oprnd = gimple_assign_rhs1 (stmt);
1353 const_oprnd = gimple_assign_rhs2 (stmt);
1354 type = gimple_expr_type (stmt);
1356 if (TREE_CODE (oprnd) != SSA_NAME
1357 || TREE_CODE (const_oprnd) != INTEGER_CST)
1358 return false;
1360 /* If oprnd has other uses besides that in stmt we cannot mark it
1361 as being part of a pattern only. */
1362 if (!has_single_use (oprnd))
1363 return false;
1365 /* If we are in the middle of a sequence, we use DEF from a previous
1366 statement. Otherwise, OPRND has to be a result of type promotion. */
1367 if (*new_type)
1369 half_type = *new_type;
1370 oprnd = def;
1372 else
1374 first = true;
1375 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1376 &promotion)
1377 || !promotion
1378 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1379 return false;
1382 /* Can we perform the operation on a smaller type? */
1383 switch (code)
1385 case BIT_IOR_EXPR:
1386 case BIT_XOR_EXPR:
1387 case BIT_AND_EXPR:
1388 if (!int_fits_type_p (const_oprnd, half_type))
1390 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1391 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1392 return false;
1394 interm_type = build_nonstandard_integer_type (
1395 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1396 if (!int_fits_type_p (const_oprnd, interm_type))
1397 return false;
1400 break;
1402 case LSHIFT_EXPR:
1403 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1404 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1405 return false;
1407 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1408 (e.g., if the original value was char, the shift amount is at most 8
1409 if we want to use short). */
1410 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1411 return false;
1413 interm_type = build_nonstandard_integer_type (
1414 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1416 if (!vect_supportable_shift (code, interm_type))
1417 return false;
1419 break;
1421 case RSHIFT_EXPR:
1422 if (vect_supportable_shift (code, half_type))
1423 break;
1425 /* Try intermediate type - HALF_TYPE is not supported. */
1426 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1427 return false;
1429 interm_type = build_nonstandard_integer_type (
1430 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1432 if (!vect_supportable_shift (code, interm_type))
1433 return false;
1435 break;
1437 default:
1438 gcc_unreachable ();
1441 /* There are four possible cases:
1442 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1443 the first statement in the sequence)
1444 a. The original, HALF_TYPE, is not enough - we replace the promotion
1445 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1446 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1447 promotion.
1448 2. OPRND is defined by a pattern statement we created.
1449 a. Its type is not sufficient for the operation, we create a new stmt:
1450 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1451 this statement in NEW_DEF_STMT, and it is later put in
1452 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1453 b. OPRND is good to use in the new statement. */
1454 if (first)
1456 if (interm_type)
1458 /* Replace the original type conversion HALF_TYPE->TYPE with
1459 HALF_TYPE->INTERM_TYPE. */
1460 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1462 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1463 /* Check if the already created pattern stmt is what we need. */
1464 if (!is_gimple_assign (new_stmt)
1465 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1466 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1467 return false;
1469 stmts->safe_push (def_stmt);
1470 oprnd = gimple_assign_lhs (new_stmt);
1472 else
1474 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1475 oprnd = gimple_assign_rhs1 (def_stmt);
1476 new_oprnd = make_ssa_name (interm_type);
1477 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1478 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1479 stmts->safe_push (def_stmt);
1480 oprnd = new_oprnd;
1483 else
1485 /* Retrieve the operand before the type promotion. */
1486 oprnd = gimple_assign_rhs1 (def_stmt);
1489 else
1491 if (interm_type)
1493 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1494 new_oprnd = make_ssa_name (interm_type);
1495 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1496 oprnd = new_oprnd;
1497 *new_def_stmt = new_stmt;
1500 /* Otherwise, OPRND is already set. */
1503 if (interm_type)
1504 *new_type = interm_type;
1505 else
1506 *new_type = half_type;
1508 *op0 = oprnd;
1509 *op1 = fold_convert (*new_type, const_oprnd);
1511 return true;
1515 /* Try to find a statement or a sequence of statements that can be performed
1516 on a smaller type:
1518 type x_t;
1519 TYPE x_T, res0_T, res1_T;
1520 loop:
1521 S1 x_t = *p;
1522 S2 x_T = (TYPE) x_t;
1523 S3 res0_T = op (x_T, C0);
1524 S4 res1_T = op (res0_T, C1);
1525 S5 ... = () res1_T; - type demotion
1527 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1528 constants.
1529 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1530 be 'type' or some intermediate type. For now, we expect S5 to be a type
1531 demotion operation. We also check that S3 and S4 have only one use. */
1533 static gimple *
1534 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1535 tree *type_in, tree *type_out)
1537 gimple *stmt = stmts->pop ();
1538 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1539 *use_stmt = NULL;
1540 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1541 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1542 bool first;
1543 tree type = NULL;
1545 first = true;
1546 while (1)
1548 if (!vinfo_for_stmt (stmt)
1549 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1550 return NULL;
1552 new_def_stmt = NULL;
1553 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1554 &op0, &op1, &new_def_stmt,
1555 stmts))
1557 if (first)
1558 return NULL;
1559 else
1560 break;
1563 /* STMT can be performed on a smaller type. Check its uses. */
1564 use_stmt = vect_single_imm_use (stmt);
1565 if (!use_stmt || !is_gimple_assign (use_stmt))
1566 return NULL;
1568 /* Create pattern statement for STMT. */
1569 vectype = get_vectype_for_scalar_type (new_type);
1570 if (!vectype)
1571 return NULL;
1573 /* We want to collect all the statements for which we create pattern
1574 statetments, except for the case when the last statement in the
1575 sequence doesn't have a corresponding pattern statement. In such
1576 case we associate the last pattern statement with the last statement
1577 in the sequence. Therefore, we only add the original statement to
1578 the list if we know that it is not the last. */
1579 if (prev_stmt)
1580 stmts->safe_push (prev_stmt);
1582 var = vect_recog_temp_ssa_var (new_type, NULL);
1583 pattern_stmt
1584 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1585 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1586 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1588 if (dump_enabled_p ())
1590 dump_printf_loc (MSG_NOTE, vect_location,
1591 "created pattern stmt: ");
1592 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1595 type = gimple_expr_type (stmt);
1596 prev_stmt = stmt;
1597 stmt = use_stmt;
1599 first = false;
1602 /* We got a sequence. We expect it to end with a type demotion operation.
1603 Otherwise, we quit (for now). There are three possible cases: the
1604 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1605 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1606 NEW_TYPE differs (we create a new conversion statement). */
1607 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1609 use_lhs = gimple_assign_lhs (use_stmt);
1610 use_type = TREE_TYPE (use_lhs);
1611 /* Support only type demotion or signedess change. */
1612 if (!INTEGRAL_TYPE_P (use_type)
1613 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1614 return NULL;
1616 /* Check that NEW_TYPE is not bigger than the conversion result. */
1617 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1618 return NULL;
1620 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1621 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1623 /* Create NEW_TYPE->USE_TYPE conversion. */
1624 new_oprnd = make_ssa_name (use_type);
1625 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1626 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1628 *type_in = get_vectype_for_scalar_type (new_type);
1629 *type_out = get_vectype_for_scalar_type (use_type);
1631 /* We created a pattern statement for the last statement in the
1632 sequence, so we don't need to associate it with the pattern
1633 statement created for PREV_STMT. Therefore, we add PREV_STMT
1634 to the list in order to mark it later in vect_pattern_recog_1. */
1635 if (prev_stmt)
1636 stmts->safe_push (prev_stmt);
1638 else
1640 if (prev_stmt)
1641 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1642 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1644 *type_in = vectype;
1645 *type_out = NULL_TREE;
1648 stmts->safe_push (use_stmt);
1650 else
1651 /* TODO: support general case, create a conversion to the correct type. */
1652 return NULL;
1654 /* Pattern detected. */
1655 if (dump_enabled_p ())
1657 dump_printf_loc (MSG_NOTE, vect_location,
1658 "vect_recog_over_widening_pattern: detected: ");
1659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1662 return pattern_stmt;
1665 /* Detect widening shift pattern:
1667 type a_t;
1668 TYPE a_T, res_T;
1670 S1 a_t = ;
1671 S2 a_T = (TYPE) a_t;
1672 S3 res_T = a_T << CONST;
1674 where type 'TYPE' is at least double the size of type 'type'.
1676 Also detect cases where the shift result is immediately converted
1677 to another type 'result_type' that is no larger in size than 'TYPE'.
1678 In those cases we perform a widen-shift that directly results in
1679 'result_type', to avoid a possible over-widening situation:
1681 type a_t;
1682 TYPE a_T, res_T;
1683 result_type res_result;
1685 S1 a_t = ;
1686 S2 a_T = (TYPE) a_t;
1687 S3 res_T = a_T << CONST;
1688 S4 res_result = (result_type) res_T;
1689 '--> res_result' = a_t w<< CONST;
1691 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1692 create an additional pattern stmt for S2 to create a variable of an
1693 intermediate type, and perform widen-shift on the intermediate type:
1695 type a_t;
1696 interm_type a_it;
1697 TYPE a_T, res_T, res_T';
1699 S1 a_t = ;
1700 S2 a_T = (TYPE) a_t;
1701 '--> a_it = (interm_type) a_t;
1702 S3 res_T = a_T << CONST;
1703 '--> res_T' = a_it <<* CONST;
1705 Input/Output:
1707 * STMTS: Contains a stmt from which the pattern search begins.
1708 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1709 in STMTS. When an intermediate type is used and a pattern statement is
1710 created for S2, we also put S2 here (before S3).
1712 Output:
1714 * TYPE_IN: The type of the input arguments to the pattern.
1716 * TYPE_OUT: The type of the output of this pattern.
1718 * Return value: A new stmt that will be used to replace the sequence of
1719 stmts that constitute the pattern. In this case it will be:
1720 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1722 static gimple *
1723 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1724 tree *type_in, tree *type_out)
1726 gimple *last_stmt = stmts->pop ();
1727 gimple *def_stmt0;
1728 tree oprnd0, oprnd1;
1729 tree type, half_type0;
1730 gimple *pattern_stmt;
1731 tree vectype, vectype_out = NULL_TREE;
1732 tree var;
1733 enum tree_code dummy_code;
1734 int dummy_int;
1735 vec<tree> dummy_vec;
1736 gimple *use_stmt;
1737 bool promotion;
1739 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1740 return NULL;
1742 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1743 return NULL;
1745 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1746 return NULL;
1748 oprnd0 = gimple_assign_rhs1 (last_stmt);
1749 oprnd1 = gimple_assign_rhs2 (last_stmt);
1750 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1751 return NULL;
1753 /* Check operand 0: it has to be defined by a type promotion. */
1754 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1755 &promotion)
1756 || !promotion)
1757 return NULL;
1759 /* Check operand 1: has to be positive. We check that it fits the type
1760 in vect_handle_widen_op_by_const (). */
1761 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1762 return NULL;
1764 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1765 type = gimple_expr_type (last_stmt);
1767 /* Check for subsequent conversion to another type. */
1768 use_stmt = vect_single_imm_use (last_stmt);
1769 if (use_stmt && is_gimple_assign (use_stmt)
1770 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1771 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1773 tree use_lhs = gimple_assign_lhs (use_stmt);
1774 tree use_type = TREE_TYPE (use_lhs);
1776 if (INTEGRAL_TYPE_P (use_type)
1777 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1779 last_stmt = use_stmt;
1780 type = use_type;
1784 /* Check if this a widening operation. */
1785 gimple *wstmt = NULL;
1786 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1787 &oprnd0, &wstmt,
1788 type, &half_type0, def_stmt0))
1789 return NULL;
1791 /* Pattern detected. */
1792 if (dump_enabled_p ())
1793 dump_printf_loc (MSG_NOTE, vect_location,
1794 "vect_recog_widen_shift_pattern: detected:\n");
1796 /* Check target support. */
1797 vectype = get_vectype_for_scalar_type (half_type0);
1798 vectype_out = get_vectype_for_scalar_type (type);
1800 if (!vectype
1801 || !vectype_out
1802 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1803 vectype_out, vectype,
1804 &dummy_code, &dummy_code,
1805 &dummy_int, &dummy_vec))
1806 return NULL;
1808 *type_in = vectype;
1809 *type_out = vectype_out;
1811 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1812 var = vect_recog_temp_ssa_var (type, NULL);
1813 pattern_stmt
1814 = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1815 if (wstmt)
1817 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1818 new_pattern_def_seq (stmt_vinfo, wstmt);
1819 stmt_vec_info new_stmt_info
1820 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1821 set_vinfo_for_stmt (wstmt, new_stmt_info);
1822 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1825 if (dump_enabled_p ())
1826 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1828 stmts->safe_push (last_stmt);
1829 return pattern_stmt;
1832 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1834 type a_t, b_t, c_t;
1836 S0 a_t = b_t r<< c_t;
1838 Input/Output:
1840 * STMTS: Contains a stmt from which the pattern search begins,
1841 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1842 with a sequence:
1844 S1 d_t = -c_t;
1845 S2 e_t = d_t & (B - 1);
1846 S3 f_t = b_t << c_t;
1847 S4 g_t = b_t >> e_t;
1848 S0 a_t = f_t | g_t;
1850 where B is element bitsize of type.
1852 Output:
1854 * TYPE_IN: The type of the input arguments to the pattern.
1856 * TYPE_OUT: The type of the output of this pattern.
1858 * Return value: A new stmt that will be used to replace the rotate
1859 S0 stmt. */
1861 static gimple *
1862 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1864 gimple *last_stmt = stmts->pop ();
1865 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1866 gimple *pattern_stmt, *def_stmt;
1867 enum tree_code rhs_code;
1868 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1869 vec_info *vinfo = stmt_vinfo->vinfo;
1870 enum vect_def_type dt;
1871 optab optab1, optab2;
1872 edge ext_def = NULL;
1874 if (!is_gimple_assign (last_stmt))
1875 return NULL;
1877 rhs_code = gimple_assign_rhs_code (last_stmt);
1878 switch (rhs_code)
1880 case LROTATE_EXPR:
1881 case RROTATE_EXPR:
1882 break;
1883 default:
1884 return NULL;
1887 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1888 return NULL;
1890 lhs = gimple_assign_lhs (last_stmt);
1891 oprnd0 = gimple_assign_rhs1 (last_stmt);
1892 type = TREE_TYPE (oprnd0);
1893 oprnd1 = gimple_assign_rhs2 (last_stmt);
1894 if (TREE_CODE (oprnd0) != SSA_NAME
1895 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1896 || !INTEGRAL_TYPE_P (type)
1897 || !TYPE_UNSIGNED (type))
1898 return NULL;
1900 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1901 return NULL;
1903 if (dt != vect_internal_def
1904 && dt != vect_constant_def
1905 && dt != vect_external_def)
1906 return NULL;
1908 vectype = get_vectype_for_scalar_type (type);
1909 if (vectype == NULL_TREE)
1910 return NULL;
1912 /* If vector/vector or vector/scalar rotate is supported by the target,
1913 don't do anything here. */
1914 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1915 if (optab1
1916 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1917 return NULL;
1919 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1921 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1922 if (optab2
1923 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1924 return NULL;
1927 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1928 don't do anything here either. */
1929 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1930 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1931 if (!optab1
1932 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1933 || !optab2
1934 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1936 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1937 return NULL;
1938 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1939 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1940 if (!optab1
1941 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1942 || !optab2
1943 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1944 return NULL;
1947 *type_in = vectype;
1948 *type_out = vectype;
1949 if (*type_in == NULL_TREE)
1950 return NULL;
1952 if (dt == vect_external_def
1953 && TREE_CODE (oprnd1) == SSA_NAME
1954 && is_a <loop_vec_info> (vinfo))
1956 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1957 ext_def = loop_preheader_edge (loop);
1958 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1960 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1961 if (bb == NULL
1962 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1963 ext_def = NULL;
1967 def = NULL_TREE;
1968 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1969 if (TREE_CODE (oprnd1) == INTEGER_CST
1970 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1971 def = oprnd1;
1972 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1974 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1975 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1976 && TYPE_PRECISION (TREE_TYPE (rhs1))
1977 == TYPE_PRECISION (type))
1978 def = rhs1;
1981 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1982 if (def == NULL_TREE)
1984 def = vect_recog_temp_ssa_var (type, NULL);
1985 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1986 if (ext_def)
1988 basic_block new_bb
1989 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1990 gcc_assert (!new_bb);
1992 else
1993 append_pattern_def_seq (stmt_vinfo, def_stmt);
1995 stype = TREE_TYPE (def);
1996 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1998 if (TREE_CODE (def) == INTEGER_CST)
2000 if (!tree_fits_uhwi_p (def)
2001 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2002 || integer_zerop (def))
2003 return NULL;
2004 def2 = build_int_cst (stype,
2005 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2007 else
2009 tree vecstype = get_vectype_for_scalar_type (stype);
2010 stmt_vec_info def_stmt_vinfo;
2012 if (vecstype == NULL_TREE)
2013 return NULL;
2014 def2 = vect_recog_temp_ssa_var (stype, NULL);
2015 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2016 if (ext_def)
2018 basic_block new_bb
2019 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2020 gcc_assert (!new_bb);
2022 else
2024 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2025 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2026 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2027 append_pattern_def_seq (stmt_vinfo, def_stmt);
2030 def2 = vect_recog_temp_ssa_var (stype, NULL);
2031 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2032 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2033 gimple_assign_lhs (def_stmt), mask);
2034 if (ext_def)
2036 basic_block new_bb
2037 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2038 gcc_assert (!new_bb);
2040 else
2042 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2043 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2044 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2045 append_pattern_def_seq (stmt_vinfo, def_stmt);
2049 var1 = vect_recog_temp_ssa_var (type, NULL);
2050 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2051 ? LSHIFT_EXPR : RSHIFT_EXPR,
2052 oprnd0, def);
2053 append_pattern_def_seq (stmt_vinfo, def_stmt);
2055 var2 = vect_recog_temp_ssa_var (type, NULL);
2056 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2057 ? RSHIFT_EXPR : LSHIFT_EXPR,
2058 oprnd0, def2);
2059 append_pattern_def_seq (stmt_vinfo, def_stmt);
2061 /* Pattern detected. */
2062 if (dump_enabled_p ())
2063 dump_printf_loc (MSG_NOTE, vect_location,
2064 "vect_recog_rotate_pattern: detected:\n");
2066 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2067 var = vect_recog_temp_ssa_var (type, NULL);
2068 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2070 if (dump_enabled_p ())
2071 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2073 stmts->safe_push (last_stmt);
2074 return pattern_stmt;
2077 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2078 vectorized:
2080 type a_t;
2081 TYPE b_T, res_T;
2083 S1 a_t = ;
2084 S2 b_T = ;
2085 S3 res_T = b_T op a_t;
2087 where type 'TYPE' is a type with different size than 'type',
2088 and op is <<, >> or rotate.
2090 Also detect cases:
2092 type a_t;
2093 TYPE b_T, c_T, res_T;
2095 S0 c_T = ;
2096 S1 a_t = (type) c_T;
2097 S2 b_T = ;
2098 S3 res_T = b_T op a_t;
2100 Input/Output:
2102 * STMTS: Contains a stmt from which the pattern search begins,
2103 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2104 with a shift/rotate which has same type on both operands, in the
2105 second case just b_T op c_T, in the first case with added cast
2106 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2108 Output:
2110 * TYPE_IN: The type of the input arguments to the pattern.
2112 * TYPE_OUT: The type of the output of this pattern.
2114 * Return value: A new stmt that will be used to replace the shift/rotate
2115 S3 stmt. */
2117 static gimple *
2118 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2119 tree *type_in, tree *type_out)
2121 gimple *last_stmt = stmts->pop ();
2122 tree oprnd0, oprnd1, lhs, var;
2123 gimple *pattern_stmt, *def_stmt;
2124 enum tree_code rhs_code;
2125 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2126 vec_info *vinfo = stmt_vinfo->vinfo;
2127 enum vect_def_type dt;
2129 if (!is_gimple_assign (last_stmt))
2130 return NULL;
2132 rhs_code = gimple_assign_rhs_code (last_stmt);
2133 switch (rhs_code)
2135 case LSHIFT_EXPR:
2136 case RSHIFT_EXPR:
2137 case LROTATE_EXPR:
2138 case RROTATE_EXPR:
2139 break;
2140 default:
2141 return NULL;
2144 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2145 return NULL;
2147 lhs = gimple_assign_lhs (last_stmt);
2148 oprnd0 = gimple_assign_rhs1 (last_stmt);
2149 oprnd1 = gimple_assign_rhs2 (last_stmt);
2150 if (TREE_CODE (oprnd0) != SSA_NAME
2151 || TREE_CODE (oprnd1) != SSA_NAME
2152 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2153 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2154 || TYPE_PRECISION (TREE_TYPE (lhs))
2155 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2156 return NULL;
2158 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2159 return NULL;
2161 if (dt != vect_internal_def)
2162 return NULL;
2164 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2165 *type_out = *type_in;
2166 if (*type_in == NULL_TREE)
2167 return NULL;
2169 tree def = NULL_TREE;
2170 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2171 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2173 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2174 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2175 && TYPE_PRECISION (TREE_TYPE (rhs1))
2176 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2178 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2179 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2180 def = rhs1;
2181 else
2183 tree mask
2184 = build_low_bits_mask (TREE_TYPE (rhs1),
2185 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2186 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2187 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2188 new_pattern_def_seq (stmt_vinfo, def_stmt);
2193 if (def == NULL_TREE)
2195 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2196 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2197 new_pattern_def_seq (stmt_vinfo, def_stmt);
2200 /* Pattern detected. */
2201 if (dump_enabled_p ())
2202 dump_printf_loc (MSG_NOTE, vect_location,
2203 "vect_recog_vector_vector_shift_pattern: detected:\n");
2205 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2206 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2207 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2209 if (dump_enabled_p ())
2210 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2212 stmts->safe_push (last_stmt);
2213 return pattern_stmt;
2216 /* Return true iff the target has a vector optab implementing the operation
2217 CODE on type VECTYPE. */
2219 static bool
2220 target_has_vecop_for_code (tree_code code, tree vectype)
2222 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2223 return voptab
2224 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2227 /* Verify that the target has optabs of VECTYPE to perform all the steps
2228 needed by the multiplication-by-immediate synthesis algorithm described by
2229 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2230 present. Return true iff the target supports all the steps. */
2232 static bool
2233 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2234 tree vectype, bool synth_shift_p)
2236 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2237 return false;
2239 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2240 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2242 if (var == negate_variant
2243 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2244 return false;
2246 /* If we must synthesize shifts with additions make sure that vector
2247 addition is available. */
2248 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2249 return false;
2251 for (int i = 1; i < alg->ops; i++)
2253 switch (alg->op[i])
2255 case alg_shift:
2256 break;
2257 case alg_add_t_m2:
2258 case alg_add_t2_m:
2259 case alg_add_factor:
2260 if (!supports_vplus)
2261 return false;
2262 break;
2263 case alg_sub_t_m2:
2264 case alg_sub_t2_m:
2265 case alg_sub_factor:
2266 if (!supports_vminus)
2267 return false;
2268 break;
2269 case alg_unknown:
2270 case alg_m:
2271 case alg_zero:
2272 case alg_impossible:
2273 return false;
2274 default:
2275 gcc_unreachable ();
2279 return true;
2282 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2283 putting the final result in DEST. Append all statements but the last into
2284 VINFO. Return the last statement. */
2286 static gimple *
2287 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2288 stmt_vec_info vinfo)
2290 HOST_WIDE_INT i;
2291 tree itype = TREE_TYPE (op);
2292 tree prev_res = op;
2293 gcc_assert (amnt >= 0);
2294 for (i = 0; i < amnt; i++)
2296 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2297 : dest;
2298 gimple *stmt
2299 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2300 prev_res = tmp_var;
2301 if (i < amnt - 1)
2302 append_pattern_def_seq (vinfo, stmt);
2303 else
2304 return stmt;
2306 gcc_unreachable ();
2307 return NULL;
2310 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2311 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2312 the process if necessary. Append the resulting assignment statements
2313 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2314 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2315 left shifts using additions. */
2317 static tree
2318 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2319 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2321 if (integer_zerop (op2)
2322 && (code == LSHIFT_EXPR
2323 || code == PLUS_EXPR))
2325 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2326 return op1;
2329 gimple *stmt;
2330 tree itype = TREE_TYPE (op1);
2331 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2333 if (code == LSHIFT_EXPR
2334 && synth_shift_p)
2336 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2337 stmt_vinfo);
2338 append_pattern_def_seq (stmt_vinfo, stmt);
2339 return tmp_var;
2342 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2343 append_pattern_def_seq (stmt_vinfo, stmt);
2344 return tmp_var;
2347 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2348 and simple arithmetic operations to be vectorized. Record the statements
2349 produced in STMT_VINFO and return the last statement in the sequence or
2350 NULL if it's not possible to synthesize such a multiplication.
2351 This function mirrors the behavior of expand_mult_const in expmed.c but
2352 works on tree-ssa form. */
2354 static gimple *
2355 vect_synth_mult_by_constant (tree op, tree val,
2356 stmt_vec_info stmt_vinfo)
2358 tree itype = TREE_TYPE (op);
2359 machine_mode mode = TYPE_MODE (itype);
2360 struct algorithm alg;
2361 mult_variant variant;
2362 if (!tree_fits_shwi_p (val))
2363 return NULL;
2365 /* Multiplication synthesis by shifts, adds and subs can introduce
2366 signed overflow where the original operation didn't. Perform the
2367 operations on an unsigned type and cast back to avoid this.
2368 In the future we may want to relax this for synthesis algorithms
2369 that we can prove do not cause unexpected overflow. */
2370 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2372 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2374 /* Targets that don't support vector shifts but support vector additions
2375 can synthesize shifts that way. */
2376 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2378 HOST_WIDE_INT hwval = tree_to_shwi (val);
2379 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2380 The vectorizer's benefit analysis will decide whether it's beneficial
2381 to do this. */
2382 bool possible = choose_mult_variant (mode, hwval, &alg,
2383 &variant, MAX_COST);
2384 if (!possible)
2385 return NULL;
2387 tree vectype = get_vectype_for_scalar_type (multtype);
2389 if (!vectype
2390 || !target_supports_mult_synth_alg (&alg, variant,
2391 vectype, synth_shift_p))
2392 return NULL;
2394 tree accumulator;
2396 /* Clear out the sequence of statements so we can populate it below. */
2397 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2398 gimple *stmt = NULL;
2400 if (cast_to_unsigned_p)
2402 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2403 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2404 append_pattern_def_seq (stmt_vinfo, stmt);
2405 op = tmp_op;
2408 if (alg.op[0] == alg_zero)
2409 accumulator = build_int_cst (multtype, 0);
2410 else
2411 accumulator = op;
2413 bool needs_fixup = (variant == negate_variant)
2414 || (variant == add_variant);
2416 for (int i = 1; i < alg.ops; i++)
2418 tree shft_log = build_int_cst (multtype, alg.log[i]);
2419 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2420 tree tmp_var = NULL_TREE;
2422 switch (alg.op[i])
2424 case alg_shift:
2425 if (synth_shift_p)
2426 stmt
2427 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2428 stmt_vinfo);
2429 else
2430 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2431 shft_log);
2432 break;
2433 case alg_add_t_m2:
2434 tmp_var
2435 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2436 stmt_vinfo, synth_shift_p);
2437 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2438 tmp_var);
2439 break;
2440 case alg_sub_t_m2:
2441 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2442 shft_log, stmt_vinfo,
2443 synth_shift_p);
2444 /* In some algorithms the first step involves zeroing the
2445 accumulator. If subtracting from such an accumulator
2446 just emit the negation directly. */
2447 if (integer_zerop (accumulator))
2448 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2449 else
2450 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2451 tmp_var);
2452 break;
2453 case alg_add_t2_m:
2454 tmp_var
2455 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2456 stmt_vinfo, synth_shift_p);
2457 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2458 break;
2459 case alg_sub_t2_m:
2460 tmp_var
2461 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2462 stmt_vinfo, synth_shift_p);
2463 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2464 break;
2465 case alg_add_factor:
2466 tmp_var
2467 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2468 stmt_vinfo, synth_shift_p);
2469 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2470 tmp_var);
2471 break;
2472 case alg_sub_factor:
2473 tmp_var
2474 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2475 stmt_vinfo, synth_shift_p);
2476 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2477 accumulator);
2478 break;
2479 default:
2480 gcc_unreachable ();
2482 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2483 but rather return it directly. */
2485 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2486 append_pattern_def_seq (stmt_vinfo, stmt);
2487 accumulator = accum_tmp;
2489 if (variant == negate_variant)
2491 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2492 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2493 accumulator = accum_tmp;
2494 if (cast_to_unsigned_p)
2495 append_pattern_def_seq (stmt_vinfo, stmt);
2497 else if (variant == add_variant)
2499 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2500 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2501 accumulator = accum_tmp;
2502 if (cast_to_unsigned_p)
2503 append_pattern_def_seq (stmt_vinfo, stmt);
2505 /* Move back to a signed if needed. */
2506 if (cast_to_unsigned_p)
2508 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2509 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2512 return stmt;
2515 /* Detect multiplication by constant and convert it into a sequence of
2516 shifts and additions, subtractions, negations. We reuse the
2517 choose_mult_variant algorithms from expmed.c
2519 Input/Output:
2521 STMTS: Contains a stmt from which the pattern search begins,
2522 i.e. the mult stmt.
2524 Output:
2526 * TYPE_IN: The type of the input arguments to the pattern.
2528 * TYPE_OUT: The type of the output of this pattern.
2530 * Return value: A new stmt that will be used to replace
2531 the multiplication. */
2533 static gimple *
2534 vect_recog_mult_pattern (vec<gimple *> *stmts,
2535 tree *type_in, tree *type_out)
2537 gimple *last_stmt = stmts->pop ();
2538 tree oprnd0, oprnd1, vectype, itype;
2539 gimple *pattern_stmt;
2540 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2542 if (!is_gimple_assign (last_stmt))
2543 return NULL;
2545 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2546 return NULL;
2548 oprnd0 = gimple_assign_rhs1 (last_stmt);
2549 oprnd1 = gimple_assign_rhs2 (last_stmt);
2550 itype = TREE_TYPE (oprnd0);
2552 if (TREE_CODE (oprnd0) != SSA_NAME
2553 || TREE_CODE (oprnd1) != INTEGER_CST
2554 || !INTEGRAL_TYPE_P (itype)
2555 || !type_has_mode_precision_p (itype))
2556 return NULL;
2558 vectype = get_vectype_for_scalar_type (itype);
2559 if (vectype == NULL_TREE)
2560 return NULL;
2562 /* If the target can handle vectorized multiplication natively,
2563 don't attempt to optimize this. */
2564 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2565 if (mul_optab != unknown_optab)
2567 machine_mode vec_mode = TYPE_MODE (vectype);
2568 int icode = (int) optab_handler (mul_optab, vec_mode);
2569 if (icode != CODE_FOR_nothing)
2570 return NULL;
2573 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2574 if (!pattern_stmt)
2575 return NULL;
2577 /* Pattern detected. */
2578 if (dump_enabled_p ())
2579 dump_printf_loc (MSG_NOTE, vect_location,
2580 "vect_recog_mult_pattern: detected:\n");
2582 if (dump_enabled_p ())
2583 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2584 pattern_stmt,0);
2586 stmts->safe_push (last_stmt);
2587 *type_in = vectype;
2588 *type_out = vectype;
2590 return pattern_stmt;
2593 /* Detect a signed division by a constant that wouldn't be
2594 otherwise vectorized:
2596 type a_t, b_t;
2598 S1 a_t = b_t / N;
2600 where type 'type' is an integral type and N is a constant.
2602 Similarly handle modulo by a constant:
2604 S4 a_t = b_t % N;
2606 Input/Output:
2608 * STMTS: Contains a stmt from which the pattern search begins,
2609 i.e. the division stmt. S1 is replaced by if N is a power
2610 of two constant and type is signed:
2611 S3 y_t = b_t < 0 ? N - 1 : 0;
2612 S2 x_t = b_t + y_t;
2613 S1' a_t = x_t >> log2 (N);
2615 S4 is replaced if N is a power of two constant and
2616 type is signed by (where *_T temporaries have unsigned type):
2617 S9 y_T = b_t < 0 ? -1U : 0U;
2618 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2619 S7 z_t = (type) z_T;
2620 S6 w_t = b_t + z_t;
2621 S5 x_t = w_t & (N - 1);
2622 S4' a_t = x_t - z_t;
2624 Output:
2626 * TYPE_IN: The type of the input arguments to the pattern.
2628 * TYPE_OUT: The type of the output of this pattern.
2630 * Return value: A new stmt that will be used to replace the division
2631 S1 or modulo S4 stmt. */
2633 static gimple *
2634 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2635 tree *type_in, tree *type_out)
2637 gimple *last_stmt = stmts->pop ();
2638 tree oprnd0, oprnd1, vectype, itype, cond;
2639 gimple *pattern_stmt, *def_stmt;
2640 enum tree_code rhs_code;
2641 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2642 vec_info *vinfo = stmt_vinfo->vinfo;
2643 optab optab;
2644 tree q;
2645 int dummy_int, prec;
2646 stmt_vec_info def_stmt_vinfo;
2648 if (!is_gimple_assign (last_stmt))
2649 return NULL;
2651 rhs_code = gimple_assign_rhs_code (last_stmt);
2652 switch (rhs_code)
2654 case TRUNC_DIV_EXPR:
2655 case TRUNC_MOD_EXPR:
2656 break;
2657 default:
2658 return NULL;
2661 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2662 return NULL;
2664 oprnd0 = gimple_assign_rhs1 (last_stmt);
2665 oprnd1 = gimple_assign_rhs2 (last_stmt);
2666 itype = TREE_TYPE (oprnd0);
2667 if (TREE_CODE (oprnd0) != SSA_NAME
2668 || TREE_CODE (oprnd1) != INTEGER_CST
2669 || TREE_CODE (itype) != INTEGER_TYPE
2670 || !type_has_mode_precision_p (itype))
2671 return NULL;
2673 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2674 vectype = get_vectype_for_scalar_type (itype);
2675 if (vectype == NULL_TREE)
2676 return NULL;
2678 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2680 /* If the target can handle vectorized division or modulo natively,
2681 don't attempt to optimize this, since native division is likely
2682 to give smaller code. */
2683 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2684 if (optab != unknown_optab)
2686 machine_mode vec_mode = TYPE_MODE (vectype);
2687 int icode = (int) optab_handler (optab, vec_mode);
2688 if (icode != CODE_FOR_nothing)
2689 return NULL;
2693 prec = TYPE_PRECISION (itype);
2694 if (integer_pow2p (oprnd1))
2696 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2697 return NULL;
2699 /* Pattern detected. */
2700 if (dump_enabled_p ())
2701 dump_printf_loc (MSG_NOTE, vect_location,
2702 "vect_recog_divmod_pattern: detected:\n");
2704 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2705 build_int_cst (itype, 0));
2706 if (rhs_code == TRUNC_DIV_EXPR)
2708 tree var = vect_recog_temp_ssa_var (itype, NULL);
2709 tree shift;
2710 def_stmt
2711 = gimple_build_assign (var, COND_EXPR, cond,
2712 fold_build2 (MINUS_EXPR, itype, oprnd1,
2713 build_int_cst (itype, 1)),
2714 build_int_cst (itype, 0));
2715 new_pattern_def_seq (stmt_vinfo, def_stmt);
2716 var = vect_recog_temp_ssa_var (itype, NULL);
2717 def_stmt
2718 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2719 gimple_assign_lhs (def_stmt));
2720 append_pattern_def_seq (stmt_vinfo, def_stmt);
2722 shift = build_int_cst (itype, tree_log2 (oprnd1));
2723 pattern_stmt
2724 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2725 RSHIFT_EXPR, var, shift);
2727 else
2729 tree signmask;
2730 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2731 if (compare_tree_int (oprnd1, 2) == 0)
2733 signmask = vect_recog_temp_ssa_var (itype, NULL);
2734 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2735 build_int_cst (itype, 1),
2736 build_int_cst (itype, 0));
2737 append_pattern_def_seq (stmt_vinfo, def_stmt);
2739 else
2741 tree utype
2742 = build_nonstandard_integer_type (prec, 1);
2743 tree vecutype = get_vectype_for_scalar_type (utype);
2744 tree shift
2745 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2746 - tree_log2 (oprnd1));
2747 tree var = vect_recog_temp_ssa_var (utype, NULL);
2749 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2750 build_int_cst (utype, -1),
2751 build_int_cst (utype, 0));
2752 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2753 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2754 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2755 append_pattern_def_seq (stmt_vinfo, def_stmt);
2756 var = vect_recog_temp_ssa_var (utype, NULL);
2757 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2758 gimple_assign_lhs (def_stmt),
2759 shift);
2760 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2761 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2762 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2763 append_pattern_def_seq (stmt_vinfo, def_stmt);
2764 signmask = vect_recog_temp_ssa_var (itype, NULL);
2765 def_stmt
2766 = gimple_build_assign (signmask, NOP_EXPR, var);
2767 append_pattern_def_seq (stmt_vinfo, def_stmt);
2769 def_stmt
2770 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2771 PLUS_EXPR, oprnd0, signmask);
2772 append_pattern_def_seq (stmt_vinfo, def_stmt);
2773 def_stmt
2774 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2775 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2776 fold_build2 (MINUS_EXPR, itype, oprnd1,
2777 build_int_cst (itype, 1)));
2778 append_pattern_def_seq (stmt_vinfo, def_stmt);
2780 pattern_stmt
2781 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2782 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2783 signmask);
2786 if (dump_enabled_p ())
2787 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2790 stmts->safe_push (last_stmt);
2792 *type_in = vectype;
2793 *type_out = vectype;
2794 return pattern_stmt;
2797 if (prec > HOST_BITS_PER_WIDE_INT
2798 || integer_zerop (oprnd1))
2799 return NULL;
2801 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2802 return NULL;
2804 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2806 if (TYPE_UNSIGNED (itype))
2808 unsigned HOST_WIDE_INT mh, ml;
2809 int pre_shift, post_shift;
2810 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2811 & GET_MODE_MASK (itype_mode));
2812 tree t1, t2, t3, t4;
2814 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2815 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2816 return NULL;
2818 /* Find a suitable multiplier and right shift count
2819 instead of multiplying with D. */
2820 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2822 /* If the suggested multiplier is more than SIZE bits, we can do better
2823 for even divisors, using an initial right shift. */
2824 if (mh != 0 && (d & 1) == 0)
2826 pre_shift = ctz_or_zero (d);
2827 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2828 &ml, &post_shift, &dummy_int);
2829 gcc_assert (!mh);
2831 else
2832 pre_shift = 0;
2834 if (mh != 0)
2836 if (post_shift - 1 >= prec)
2837 return NULL;
2839 /* t1 = oprnd0 h* ml;
2840 t2 = oprnd0 - t1;
2841 t3 = t2 >> 1;
2842 t4 = t1 + t3;
2843 q = t4 >> (post_shift - 1); */
2844 t1 = vect_recog_temp_ssa_var (itype, NULL);
2845 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2846 build_int_cst (itype, ml));
2847 append_pattern_def_seq (stmt_vinfo, def_stmt);
2849 t2 = vect_recog_temp_ssa_var (itype, NULL);
2850 def_stmt
2851 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2852 append_pattern_def_seq (stmt_vinfo, def_stmt);
2854 t3 = vect_recog_temp_ssa_var (itype, NULL);
2855 def_stmt
2856 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2857 append_pattern_def_seq (stmt_vinfo, def_stmt);
2859 t4 = vect_recog_temp_ssa_var (itype, NULL);
2860 def_stmt
2861 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2863 if (post_shift != 1)
2865 append_pattern_def_seq (stmt_vinfo, def_stmt);
2867 q = vect_recog_temp_ssa_var (itype, NULL);
2868 pattern_stmt
2869 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2870 build_int_cst (itype, post_shift - 1));
2872 else
2874 q = t4;
2875 pattern_stmt = def_stmt;
2878 else
2880 if (pre_shift >= prec || post_shift >= prec)
2881 return NULL;
2883 /* t1 = oprnd0 >> pre_shift;
2884 t2 = t1 h* ml;
2885 q = t2 >> post_shift; */
2886 if (pre_shift)
2888 t1 = vect_recog_temp_ssa_var (itype, NULL);
2889 def_stmt
2890 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2891 build_int_cst (NULL, pre_shift));
2892 append_pattern_def_seq (stmt_vinfo, def_stmt);
2894 else
2895 t1 = oprnd0;
2897 t2 = vect_recog_temp_ssa_var (itype, NULL);
2898 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2899 build_int_cst (itype, ml));
2901 if (post_shift)
2903 append_pattern_def_seq (stmt_vinfo, def_stmt);
2905 q = vect_recog_temp_ssa_var (itype, NULL);
2906 def_stmt
2907 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2908 build_int_cst (itype, post_shift));
2910 else
2911 q = t2;
2913 pattern_stmt = def_stmt;
2916 else
2918 unsigned HOST_WIDE_INT ml;
2919 int post_shift;
2920 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2921 unsigned HOST_WIDE_INT abs_d;
2922 bool add = false;
2923 tree t1, t2, t3, t4;
2925 /* Give up for -1. */
2926 if (d == -1)
2927 return NULL;
2929 /* Since d might be INT_MIN, we have to cast to
2930 unsigned HOST_WIDE_INT before negating to avoid
2931 undefined signed overflow. */
2932 abs_d = (d >= 0
2933 ? (unsigned HOST_WIDE_INT) d
2934 : - (unsigned HOST_WIDE_INT) d);
2936 /* n rem d = n rem -d */
2937 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2939 d = abs_d;
2940 oprnd1 = build_int_cst (itype, abs_d);
2942 else if (HOST_BITS_PER_WIDE_INT >= prec
2943 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2944 /* This case is not handled correctly below. */
2945 return NULL;
2947 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2948 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2950 add = true;
2951 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2953 if (post_shift >= prec)
2954 return NULL;
2956 /* t1 = oprnd0 h* ml; */
2957 t1 = vect_recog_temp_ssa_var (itype, NULL);
2958 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2959 build_int_cst (itype, ml));
2961 if (add)
2963 /* t2 = t1 + oprnd0; */
2964 append_pattern_def_seq (stmt_vinfo, def_stmt);
2965 t2 = vect_recog_temp_ssa_var (itype, NULL);
2966 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2968 else
2969 t2 = t1;
2971 if (post_shift)
2973 /* t3 = t2 >> post_shift; */
2974 append_pattern_def_seq (stmt_vinfo, def_stmt);
2975 t3 = vect_recog_temp_ssa_var (itype, NULL);
2976 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2977 build_int_cst (itype, post_shift));
2979 else
2980 t3 = t2;
2982 wide_int oprnd0_min, oprnd0_max;
2983 int msb = 1;
2984 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2986 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2987 msb = 0;
2988 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2989 msb = -1;
2992 if (msb == 0 && d >= 0)
2994 /* q = t3; */
2995 q = t3;
2996 pattern_stmt = def_stmt;
2998 else
3000 /* t4 = oprnd0 >> (prec - 1);
3001 or if we know from VRP that oprnd0 >= 0
3002 t4 = 0;
3003 or if we know from VRP that oprnd0 < 0
3004 t4 = -1; */
3005 append_pattern_def_seq (stmt_vinfo, def_stmt);
3006 t4 = vect_recog_temp_ssa_var (itype, NULL);
3007 if (msb != 1)
3008 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3009 build_int_cst (itype, msb));
3010 else
3011 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3012 build_int_cst (itype, prec - 1));
3013 append_pattern_def_seq (stmt_vinfo, def_stmt);
3015 /* q = t3 - t4; or q = t4 - t3; */
3016 q = vect_recog_temp_ssa_var (itype, NULL);
3017 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3018 d < 0 ? t3 : t4);
3022 if (rhs_code == TRUNC_MOD_EXPR)
3024 tree r, t1;
3026 /* We divided. Now finish by:
3027 t1 = q * oprnd1;
3028 r = oprnd0 - t1; */
3029 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3031 t1 = vect_recog_temp_ssa_var (itype, NULL);
3032 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3033 append_pattern_def_seq (stmt_vinfo, def_stmt);
3035 r = vect_recog_temp_ssa_var (itype, NULL);
3036 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3039 /* Pattern detected. */
3040 if (dump_enabled_p ())
3042 dump_printf_loc (MSG_NOTE, vect_location,
3043 "vect_recog_divmod_pattern: detected: ");
3044 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3047 stmts->safe_push (last_stmt);
3049 *type_in = vectype;
3050 *type_out = vectype;
3051 return pattern_stmt;
3054 /* Function vect_recog_mixed_size_cond_pattern
3056 Try to find the following pattern:
3058 type x_t, y_t;
3059 TYPE a_T, b_T, c_T;
3060 loop:
3061 S1 a_T = x_t CMP y_t ? b_T : c_T;
3063 where type 'TYPE' is an integral type which has different size
3064 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3065 than 'type', the constants need to fit into an integer type
3066 with the same width as 'type') or results of conversion from 'type'.
3068 Input:
3070 * LAST_STMT: A stmt from which the pattern search begins.
3072 Output:
3074 * TYPE_IN: The type of the input arguments to the pattern.
3076 * TYPE_OUT: The type of the output of this pattern.
3078 * Return value: A new stmt that will be used to replace the pattern.
3079 Additionally a def_stmt is added.
3081 a_it = x_t CMP y_t ? b_it : c_it;
3082 a_T = (TYPE) a_it; */
3084 static gimple *
3085 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
3086 tree *type_out)
3088 gimple *last_stmt = (*stmts)[0];
3089 tree cond_expr, then_clause, else_clause;
3090 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3091 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3092 gimple *pattern_stmt, *def_stmt;
3093 vec_info *vinfo = stmt_vinfo->vinfo;
3094 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3095 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3096 bool promotion;
3097 tree comp_scalar_type;
3099 if (!is_gimple_assign (last_stmt)
3100 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3101 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3102 return NULL;
3104 cond_expr = gimple_assign_rhs1 (last_stmt);
3105 then_clause = gimple_assign_rhs2 (last_stmt);
3106 else_clause = gimple_assign_rhs3 (last_stmt);
3108 if (!COMPARISON_CLASS_P (cond_expr))
3109 return NULL;
3111 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3112 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3113 if (comp_vectype == NULL_TREE)
3114 return NULL;
3116 type = gimple_expr_type (last_stmt);
3117 if (types_compatible_p (type, comp_scalar_type)
3118 || ((TREE_CODE (then_clause) != INTEGER_CST
3119 || TREE_CODE (else_clause) != INTEGER_CST)
3120 && !INTEGRAL_TYPE_P (comp_scalar_type))
3121 || !INTEGRAL_TYPE_P (type))
3122 return NULL;
3124 if ((TREE_CODE (then_clause) != INTEGER_CST
3125 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3126 &def_stmt0, &promotion))
3127 || (TREE_CODE (else_clause) != INTEGER_CST
3128 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3129 &def_stmt1, &promotion)))
3130 return NULL;
3132 if (orig_type0 && orig_type1
3133 && !types_compatible_p (orig_type0, orig_type1))
3134 return NULL;
3136 if (orig_type0)
3138 if (!types_compatible_p (orig_type0, comp_scalar_type))
3139 return NULL;
3140 then_clause = gimple_assign_rhs1 (def_stmt0);
3141 itype = orig_type0;
3144 if (orig_type1)
3146 if (!types_compatible_p (orig_type1, comp_scalar_type))
3147 return NULL;
3148 else_clause = gimple_assign_rhs1 (def_stmt1);
3149 itype = orig_type1;
3153 HOST_WIDE_INT cmp_mode_size
3154 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3156 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3157 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3158 return NULL;
3160 vectype = get_vectype_for_scalar_type (type);
3161 if (vectype == NULL_TREE)
3162 return NULL;
3164 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3165 return NULL;
3167 if (itype == NULL_TREE)
3168 itype = build_nonstandard_integer_type (cmp_mode_size,
3169 TYPE_UNSIGNED (type));
3171 if (itype == NULL_TREE
3172 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3173 return NULL;
3175 vecitype = get_vectype_for_scalar_type (itype);
3176 if (vecitype == NULL_TREE)
3177 return NULL;
3179 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3180 return NULL;
3182 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3184 if ((TREE_CODE (then_clause) == INTEGER_CST
3185 && !int_fits_type_p (then_clause, itype))
3186 || (TREE_CODE (else_clause) == INTEGER_CST
3187 && !int_fits_type_p (else_clause, itype)))
3188 return NULL;
3191 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3192 COND_EXPR, unshare_expr (cond_expr),
3193 fold_convert (itype, then_clause),
3194 fold_convert (itype, else_clause));
3195 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3196 NOP_EXPR, gimple_assign_lhs (def_stmt));
3198 new_pattern_def_seq (stmt_vinfo, def_stmt);
3199 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3200 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3201 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3202 *type_in = vecitype;
3203 *type_out = vectype;
3205 if (dump_enabled_p ())
3206 dump_printf_loc (MSG_NOTE, vect_location,
3207 "vect_recog_mixed_size_cond_pattern: detected:\n");
3209 return pattern_stmt;
3213 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3214 true if bool VAR can and should be optimized that way. Assume it shouldn't
3215 in case it's a result of a comparison which can be directly vectorized into
3216 a vector comparison. Fills in STMTS with all stmts visited during the
3217 walk. */
3219 static bool
3220 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3222 gimple *def_stmt;
3223 enum vect_def_type dt;
3224 tree rhs1;
3225 enum tree_code rhs_code;
3227 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3228 return false;
3230 if (dt != vect_internal_def)
3231 return false;
3233 if (!is_gimple_assign (def_stmt))
3234 return false;
3236 if (stmts.contains (def_stmt))
3237 return true;
3239 rhs1 = gimple_assign_rhs1 (def_stmt);
3240 rhs_code = gimple_assign_rhs_code (def_stmt);
3241 switch (rhs_code)
3243 case SSA_NAME:
3244 if (! check_bool_pattern (rhs1, vinfo, stmts))
3245 return false;
3246 break;
3248 CASE_CONVERT:
3249 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3250 return false;
3251 if (! check_bool_pattern (rhs1, vinfo, stmts))
3252 return false;
3253 break;
3255 case BIT_NOT_EXPR:
3256 if (! check_bool_pattern (rhs1, vinfo, stmts))
3257 return false;
3258 break;
3260 case BIT_AND_EXPR:
3261 case BIT_IOR_EXPR:
3262 case BIT_XOR_EXPR:
3263 if (! check_bool_pattern (rhs1, vinfo, stmts)
3264 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3265 return false;
3266 break;
3268 default:
3269 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3271 tree vecitype, comp_vectype;
3273 /* If the comparison can throw, then is_gimple_condexpr will be
3274 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3275 if (stmt_could_throw_p (def_stmt))
3276 return false;
3278 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3279 if (comp_vectype == NULL_TREE)
3280 return false;
3282 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3283 if (mask_type
3284 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3285 return false;
3287 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3289 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3290 tree itype
3291 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3292 vecitype = get_vectype_for_scalar_type (itype);
3293 if (vecitype == NULL_TREE)
3294 return false;
3296 else
3297 vecitype = comp_vectype;
3298 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3299 return false;
3301 else
3302 return false;
3303 break;
3306 bool res = stmts.add (def_stmt);
3307 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3308 gcc_assert (!res);
3310 return true;
3314 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3315 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3316 pattern sequence. */
3318 static tree
3319 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3321 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3322 NOP_EXPR, var);
3323 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3324 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3325 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3326 append_pattern_def_seq (stmt_info, cast_stmt);
3327 return gimple_assign_lhs (cast_stmt);
3330 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3331 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3332 type, OUT_TYPE is the desired final integer type of the whole pattern.
3333 STMT_INFO is the info of the pattern root and is where pattern stmts should
3334 be associated with. DEFS is a map of pattern defs. */
3336 static void
3337 adjust_bool_pattern (tree var, tree out_type,
3338 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3340 gimple *stmt = SSA_NAME_DEF_STMT (var);
3341 enum tree_code rhs_code, def_rhs_code;
3342 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3343 location_t loc;
3344 gimple *pattern_stmt, *def_stmt;
3345 tree trueval = NULL_TREE;
3347 rhs1 = gimple_assign_rhs1 (stmt);
3348 rhs2 = gimple_assign_rhs2 (stmt);
3349 rhs_code = gimple_assign_rhs_code (stmt);
3350 loc = gimple_location (stmt);
3351 switch (rhs_code)
3353 case SSA_NAME:
3354 CASE_CONVERT:
3355 irhs1 = *defs.get (rhs1);
3356 itype = TREE_TYPE (irhs1);
3357 pattern_stmt
3358 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3359 SSA_NAME, irhs1);
3360 break;
3362 case BIT_NOT_EXPR:
3363 irhs1 = *defs.get (rhs1);
3364 itype = TREE_TYPE (irhs1);
3365 pattern_stmt
3366 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3367 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3368 break;
3370 case BIT_AND_EXPR:
3371 /* Try to optimize x = y & (a < b ? 1 : 0); into
3372 x = (a < b ? y : 0);
3374 E.g. for:
3375 bool a_b, b_b, c_b;
3376 TYPE d_T;
3378 S1 a_b = x1 CMP1 y1;
3379 S2 b_b = x2 CMP2 y2;
3380 S3 c_b = a_b & b_b;
3381 S4 d_T = (TYPE) c_b;
3383 we would normally emit:
3385 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3386 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3387 S3' c_T = a_T & b_T;
3388 S4' d_T = c_T;
3390 but we can save one stmt by using the
3391 result of one of the COND_EXPRs in the other COND_EXPR and leave
3392 BIT_AND_EXPR stmt out:
3394 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3395 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3396 S4' f_T = c_T;
3398 At least when VEC_COND_EXPR is implemented using masks
3399 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3400 computes the comparison masks and ands it, in one case with
3401 all ones vector, in the other case with a vector register.
3402 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3403 often more expensive. */
3404 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3405 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3406 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3408 irhs1 = *defs.get (rhs1);
3409 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3410 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3411 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3413 rhs_code = def_rhs_code;
3414 rhs1 = def_rhs1;
3415 rhs2 = gimple_assign_rhs2 (def_stmt);
3416 trueval = irhs1;
3417 goto do_compare;
3419 else
3420 irhs2 = *defs.get (rhs2);
3421 goto and_ior_xor;
3423 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3424 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3425 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3427 irhs2 = *defs.get (rhs2);
3428 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3429 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3430 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3432 rhs_code = def_rhs_code;
3433 rhs1 = def_rhs1;
3434 rhs2 = gimple_assign_rhs2 (def_stmt);
3435 trueval = irhs2;
3436 goto do_compare;
3438 else
3439 irhs1 = *defs.get (rhs1);
3440 goto and_ior_xor;
3442 /* FALLTHRU */
3443 case BIT_IOR_EXPR:
3444 case BIT_XOR_EXPR:
3445 irhs1 = *defs.get (rhs1);
3446 irhs2 = *defs.get (rhs2);
3447 and_ior_xor:
3448 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3449 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3451 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3452 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3453 int out_prec = TYPE_PRECISION (out_type);
3454 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3455 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3456 stmt_info);
3457 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3458 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3459 stmt_info);
3460 else
3462 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3463 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3466 itype = TREE_TYPE (irhs1);
3467 pattern_stmt
3468 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3469 rhs_code, irhs1, irhs2);
3470 break;
3472 default:
3473 do_compare:
3474 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3475 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3476 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3477 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3478 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3480 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3481 itype
3482 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3484 else
3485 itype = TREE_TYPE (rhs1);
3486 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3487 if (trueval == NULL_TREE)
3488 trueval = build_int_cst (itype, 1);
3489 else
3490 gcc_checking_assert (useless_type_conversion_p (itype,
3491 TREE_TYPE (trueval)));
3492 pattern_stmt
3493 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3494 COND_EXPR, cond_expr, trueval,
3495 build_int_cst (itype, 0));
3496 break;
3499 gimple_set_location (pattern_stmt, loc);
3500 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3501 pattern def seq stmts instead of just letting auto-detection do
3502 its work? */
3503 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3504 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3505 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3506 append_pattern_def_seq (stmt_info, pattern_stmt);
3507 defs.put (var, gimple_assign_lhs (pattern_stmt));
3510 /* Comparison function to qsort a vector of gimple stmts after UID. */
3512 static int
3513 sort_after_uid (const void *p1, const void *p2)
3515 const gimple *stmt1 = *(const gimple * const *)p1;
3516 const gimple *stmt2 = *(const gimple * const *)p2;
3517 return gimple_uid (stmt1) - gimple_uid (stmt2);
3520 /* Create pattern stmts for all stmts participating in the bool pattern
3521 specified by BOOL_STMT_SET and its root STMT with the desired type
3522 OUT_TYPE. Return the def of the pattern root. */
3524 static tree
3525 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3526 tree out_type, gimple *stmt)
3528 /* Gather original stmts in the bool pattern in their order of appearance
3529 in the IL. */
3530 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3531 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3532 i != bool_stmt_set.end (); ++i)
3533 bool_stmts.quick_push (*i);
3534 bool_stmts.qsort (sort_after_uid);
3536 /* Now process them in that order, producing pattern stmts. */
3537 hash_map <tree, tree> defs;
3538 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3539 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3540 out_type, vinfo_for_stmt (stmt), defs);
3542 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3543 gimple *pattern_stmt
3544 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3545 return gimple_assign_lhs (pattern_stmt);
3548 /* Helper for search_type_for_mask. */
3550 static tree
3551 search_type_for_mask_1 (tree var, vec_info *vinfo,
3552 hash_map<gimple *, tree> &cache)
3554 gimple *def_stmt;
3555 enum vect_def_type dt;
3556 tree rhs1;
3557 enum tree_code rhs_code;
3558 tree res = NULL_TREE, res2;
3560 if (TREE_CODE (var) != SSA_NAME)
3561 return NULL_TREE;
3563 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3564 return NULL_TREE;
3566 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3567 return NULL_TREE;
3569 if (dt != vect_internal_def)
3570 return NULL_TREE;
3572 if (!is_gimple_assign (def_stmt))
3573 return NULL_TREE;
3575 tree *c = cache.get (def_stmt);
3576 if (c)
3577 return *c;
3579 rhs_code = gimple_assign_rhs_code (def_stmt);
3580 rhs1 = gimple_assign_rhs1 (def_stmt);
3582 switch (rhs_code)
3584 case SSA_NAME:
3585 case BIT_NOT_EXPR:
3586 CASE_CONVERT:
3587 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3588 break;
3590 case BIT_AND_EXPR:
3591 case BIT_IOR_EXPR:
3592 case BIT_XOR_EXPR:
3593 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3594 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3595 cache);
3596 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3597 res = res2;
3598 break;
3600 default:
3601 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3603 tree comp_vectype, mask_type;
3605 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3607 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3608 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3609 vinfo, cache);
3610 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3611 res = res2;
3612 break;
3615 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3616 if (comp_vectype == NULL_TREE)
3618 res = NULL_TREE;
3619 break;
3622 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3623 if (!mask_type
3624 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3626 res = NULL_TREE;
3627 break;
3630 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3631 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3633 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3634 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3636 else
3637 res = TREE_TYPE (rhs1);
3641 cache.put (def_stmt, res);
3642 return res;
3645 /* Return the proper type for converting bool VAR into
3646 an integer value or NULL_TREE if no such type exists.
3647 The type is chosen so that converted value has the
3648 same number of elements as VAR's vector type. */
3650 static tree
3651 search_type_for_mask (tree var, vec_info *vinfo)
3653 hash_map<gimple *, tree> cache;
3654 return search_type_for_mask_1 (var, vinfo, cache);
3657 /* Function vect_recog_bool_pattern
3659 Try to find pattern like following:
3661 bool a_b, b_b, c_b, d_b, e_b;
3662 TYPE f_T;
3663 loop:
3664 S1 a_b = x1 CMP1 y1;
3665 S2 b_b = x2 CMP2 y2;
3666 S3 c_b = a_b & b_b;
3667 S4 d_b = x3 CMP3 y3;
3668 S5 e_b = c_b | d_b;
3669 S6 f_T = (TYPE) e_b;
3671 where type 'TYPE' is an integral type. Or a similar pattern
3672 ending in
3674 S6 f_Y = e_b ? r_Y : s_Y;
3676 as results from if-conversion of a complex condition.
3678 Input:
3680 * LAST_STMT: A stmt at the end from which the pattern
3681 search begins, i.e. cast of a bool to
3682 an integer type.
3684 Output:
3686 * TYPE_IN: The type of the input arguments to the pattern.
3688 * TYPE_OUT: The type of the output of this pattern.
3690 * Return value: A new stmt that will be used to replace the pattern.
3692 Assuming size of TYPE is the same as size of all comparisons
3693 (otherwise some casts would be added where needed), the above
3694 sequence we create related pattern stmts:
3695 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3696 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3697 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3698 S5' e_T = c_T | d_T;
3699 S6' f_T = e_T;
3701 Instead of the above S3' we could emit:
3702 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3703 S3' c_T = a_T | b_T;
3704 but the above is more efficient. */
3706 static gimple *
3707 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3708 tree *type_out)
3710 gimple *last_stmt = stmts->pop ();
3711 enum tree_code rhs_code;
3712 tree var, lhs, rhs, vectype;
3713 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3714 stmt_vec_info new_stmt_info;
3715 vec_info *vinfo = stmt_vinfo->vinfo;
3716 gimple *pattern_stmt;
3718 if (!is_gimple_assign (last_stmt))
3719 return NULL;
3721 var = gimple_assign_rhs1 (last_stmt);
3722 lhs = gimple_assign_lhs (last_stmt);
3724 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3725 return NULL;
3727 hash_set<gimple *> bool_stmts;
3729 rhs_code = gimple_assign_rhs_code (last_stmt);
3730 if (CONVERT_EXPR_CODE_P (rhs_code))
3732 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3733 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3734 return NULL;
3735 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3736 if (vectype == NULL_TREE)
3737 return NULL;
3739 if (check_bool_pattern (var, vinfo, bool_stmts))
3741 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3742 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3743 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3744 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3745 else
3746 pattern_stmt
3747 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3749 else
3751 tree type = search_type_for_mask (var, vinfo);
3752 tree cst0, cst1, tmp;
3754 if (!type)
3755 return NULL;
3757 /* We may directly use cond with narrowed type to avoid
3758 multiple cond exprs with following result packing and
3759 perform single cond with packed mask instead. In case
3760 of widening we better make cond first and then extract
3761 results. */
3762 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3763 type = TREE_TYPE (lhs);
3765 cst0 = build_int_cst (type, 0);
3766 cst1 = build_int_cst (type, 1);
3767 tmp = vect_recog_temp_ssa_var (type, NULL);
3768 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3770 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3772 tree new_vectype = get_vectype_for_scalar_type (type);
3773 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3774 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3775 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3776 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3778 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3779 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3783 *type_out = vectype;
3784 *type_in = vectype;
3785 stmts->safe_push (last_stmt);
3786 if (dump_enabled_p ())
3787 dump_printf_loc (MSG_NOTE, vect_location,
3788 "vect_recog_bool_pattern: detected:\n");
3790 return pattern_stmt;
3792 else if (rhs_code == COND_EXPR
3793 && TREE_CODE (var) == SSA_NAME)
3795 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3796 if (vectype == NULL_TREE)
3797 return NULL;
3799 /* Build a scalar type for the boolean result that when
3800 vectorized matches the vector type of the result in
3801 size and number of elements. */
3802 unsigned prec
3803 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3804 TYPE_VECTOR_SUBPARTS (vectype));
3806 tree type
3807 = build_nonstandard_integer_type (prec,
3808 TYPE_UNSIGNED (TREE_TYPE (var)));
3809 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3810 return NULL;
3812 if (!check_bool_pattern (var, vinfo, bool_stmts))
3813 return NULL;
3815 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3817 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3818 pattern_stmt
3819 = gimple_build_assign (lhs, COND_EXPR,
3820 build2 (NE_EXPR, boolean_type_node,
3821 rhs, build_int_cst (type, 0)),
3822 gimple_assign_rhs2 (last_stmt),
3823 gimple_assign_rhs3 (last_stmt));
3824 *type_out = vectype;
3825 *type_in = vectype;
3826 stmts->safe_push (last_stmt);
3827 if (dump_enabled_p ())
3828 dump_printf_loc (MSG_NOTE, vect_location,
3829 "vect_recog_bool_pattern: detected:\n");
3831 return pattern_stmt;
3833 else if (rhs_code == SSA_NAME
3834 && STMT_VINFO_DATA_REF (stmt_vinfo))
3836 stmt_vec_info pattern_stmt_info;
3837 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3838 gcc_assert (vectype != NULL_TREE);
3839 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3840 return NULL;
3842 if (check_bool_pattern (var, vinfo, bool_stmts))
3843 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3844 else
3846 tree type = search_type_for_mask (var, vinfo);
3847 tree cst0, cst1, new_vectype;
3849 if (!type)
3850 return NULL;
3852 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3853 type = TREE_TYPE (vectype);
3855 cst0 = build_int_cst (type, 0);
3856 cst1 = build_int_cst (type, 1);
3857 new_vectype = get_vectype_for_scalar_type (type);
3859 rhs = vect_recog_temp_ssa_var (type, NULL);
3860 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3862 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3863 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3864 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3865 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3868 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3869 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3871 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3872 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3873 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3874 rhs = rhs2;
3876 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3877 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3878 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3879 STMT_VINFO_DATA_REF (pattern_stmt_info)
3880 = STMT_VINFO_DATA_REF (stmt_vinfo);
3881 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3882 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3883 *type_out = vectype;
3884 *type_in = vectype;
3885 stmts->safe_push (last_stmt);
3886 if (dump_enabled_p ())
3887 dump_printf_loc (MSG_NOTE, vect_location,
3888 "vect_recog_bool_pattern: detected:\n");
3889 return pattern_stmt;
3891 else
3892 return NULL;
3896 /* A helper for vect_recog_mask_conversion_pattern. Build
3897 conversion of MASK to a type suitable for masking VECTYPE.
3898 Built statement gets required vectype and is appended to
3899 a pattern sequence of STMT_VINFO.
3901 Return converted mask. */
3903 static tree
3904 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3905 vec_info *vinfo)
3907 gimple *stmt;
3908 tree masktype, tmp;
3909 stmt_vec_info new_stmt_info;
3911 masktype = build_same_sized_truth_vector_type (vectype);
3912 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3913 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3914 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3915 set_vinfo_for_stmt (stmt, new_stmt_info);
3916 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3917 append_pattern_def_seq (stmt_vinfo, stmt);
3919 return tmp;
3923 /* Function vect_recog_mask_conversion_pattern
3925 Try to find statements which require boolean type
3926 converison. Additional conversion statements are
3927 added to handle such cases. For example:
3929 bool m_1, m_2, m_3;
3930 int i_4, i_5;
3931 double d_6, d_7;
3932 char c_1, c_2, c_3;
3934 S1 m_1 = i_4 > i_5;
3935 S2 m_2 = d_6 < d_7;
3936 S3 m_3 = m_1 & m_2;
3937 S4 c_1 = m_3 ? c_2 : c_3;
3939 Will be transformed into:
3941 S1 m_1 = i_4 > i_5;
3942 S2 m_2 = d_6 < d_7;
3943 S3'' m_2' = (_Bool[bitsize=32])m_2
3944 S3' m_3' = m_1 & m_2';
3945 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3946 S4' c_1' = m_3'' ? c_2 : c_3; */
3948 static gimple *
3949 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3950 tree *type_out)
3952 gimple *last_stmt = stmts->pop ();
3953 enum tree_code rhs_code;
3954 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3955 tree vectype1, vectype2;
3956 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3957 stmt_vec_info pattern_stmt_info;
3958 vec_info *vinfo = stmt_vinfo->vinfo;
3960 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3961 if (is_gimple_call (last_stmt)
3962 && gimple_call_internal_p (last_stmt)
3963 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3964 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3966 gcall *pattern_stmt;
3967 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3969 if (load)
3971 lhs = gimple_call_lhs (last_stmt);
3972 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3974 else
3976 rhs2 = gimple_call_arg (last_stmt, 3);
3977 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3980 rhs1 = gimple_call_arg (last_stmt, 2);
3981 rhs1_type = search_type_for_mask (rhs1, vinfo);
3982 if (!rhs1_type)
3983 return NULL;
3984 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3986 if (!vectype1 || !vectype2
3987 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3988 TYPE_VECTOR_SUBPARTS (vectype2)))
3989 return NULL;
3991 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3993 if (load)
3995 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3996 pattern_stmt
3997 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3998 gimple_call_arg (last_stmt, 0),
3999 gimple_call_arg (last_stmt, 1),
4000 tmp);
4001 gimple_call_set_lhs (pattern_stmt, lhs);
4003 else
4004 pattern_stmt
4005 = gimple_build_call_internal (IFN_MASK_STORE, 4,
4006 gimple_call_arg (last_stmt, 0),
4007 gimple_call_arg (last_stmt, 1),
4008 tmp,
4009 gimple_call_arg (last_stmt, 3));
4011 gimple_call_set_nothrow (pattern_stmt, true);
4013 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4014 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4015 STMT_VINFO_DATA_REF (pattern_stmt_info)
4016 = STMT_VINFO_DATA_REF (stmt_vinfo);
4017 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4018 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
4020 *type_out = vectype1;
4021 *type_in = vectype1;
4022 stmts->safe_push (last_stmt);
4023 if (dump_enabled_p ())
4024 dump_printf_loc (MSG_NOTE, vect_location,
4025 "vect_recog_mask_conversion_pattern: detected:\n");
4027 return pattern_stmt;
4030 if (!is_gimple_assign (last_stmt))
4031 return NULL;
4033 gimple *pattern_stmt;
4034 lhs = gimple_assign_lhs (last_stmt);
4035 rhs1 = gimple_assign_rhs1 (last_stmt);
4036 rhs_code = gimple_assign_rhs_code (last_stmt);
4038 /* Check for cond expression requiring mask conversion. */
4039 if (rhs_code == COND_EXPR)
4041 /* vect_recog_mixed_size_cond_pattern could apply.
4042 Do nothing then. */
4043 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4044 return NULL;
4046 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4048 if (TREE_CODE (rhs1) == SSA_NAME)
4050 rhs1_type = search_type_for_mask (rhs1, vinfo);
4051 if (!rhs1_type)
4052 return NULL;
4054 else if (COMPARISON_CLASS_P (rhs1))
4056 /* Check whether we're comparing scalar booleans and (if so)
4057 whether a better mask type exists than the mask associated
4058 with boolean-sized elements. This avoids unnecessary packs
4059 and unpacks if the booleans are set from comparisons of
4060 wider types. E.g. in:
4062 int x1, x2, x3, x4, y1, y1;
4064 bool b1 = (x1 == x2);
4065 bool b2 = (x3 == x4);
4066 ... = b1 == b2 ? y1 : y2;
4068 it is better for b1 and b2 to use the mask type associated
4069 with int elements rather bool (byte) elements. */
4070 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4071 if (!rhs1_type)
4072 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4074 else
4075 return NULL;
4077 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4079 if (!vectype1 || !vectype2)
4080 return NULL;
4082 /* Continue if a conversion is needed. Also continue if we have
4083 a comparison whose vector type would normally be different from
4084 VECTYPE2 when considered in isolation. In that case we'll
4085 replace the comparison with an SSA name (so that we can record
4086 its vector type) and behave as though the comparison was an SSA
4087 name from the outset. */
4088 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4089 TYPE_VECTOR_SUBPARTS (vectype2))
4090 && (TREE_CODE (rhs1) == SSA_NAME
4091 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4092 return NULL;
4094 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4095 in place, we can handle it in vectorizable_condition. This avoids
4096 unnecessary promotion stmts and increased vectorization factor. */
4097 if (COMPARISON_CLASS_P (rhs1)
4098 && INTEGRAL_TYPE_P (rhs1_type)
4099 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4100 TYPE_VECTOR_SUBPARTS (vectype2)))
4102 gimple *dummy;
4103 enum vect_def_type dt;
4104 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
4105 &dummy, &dt)
4106 && dt == vect_external_def
4107 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
4108 &dummy, &dt)
4109 && (dt == vect_external_def
4110 || dt == vect_constant_def))
4112 tree wide_scalar_type = build_nonstandard_integer_type
4113 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4114 TYPE_UNSIGNED (rhs1_type));
4115 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4116 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4117 return NULL;
4121 /* If rhs1 is a comparison we need to move it into a
4122 separate statement. */
4123 if (TREE_CODE (rhs1) != SSA_NAME)
4125 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4126 pattern_stmt = gimple_build_assign (tmp, rhs1);
4127 rhs1 = tmp;
4129 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4130 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4131 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4132 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4135 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4136 TYPE_VECTOR_SUBPARTS (vectype2)))
4137 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4138 else
4139 tmp = rhs1;
4141 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4142 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4143 gimple_assign_rhs2 (last_stmt),
4144 gimple_assign_rhs3 (last_stmt));
4146 *type_out = vectype1;
4147 *type_in = vectype1;
4148 stmts->safe_push (last_stmt);
4149 if (dump_enabled_p ())
4150 dump_printf_loc (MSG_NOTE, vect_location,
4151 "vect_recog_mask_conversion_pattern: detected:\n");
4153 return pattern_stmt;
4156 /* Now check for binary boolean operations requiring conversion for
4157 one of operands. */
4158 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4159 return NULL;
4161 if (rhs_code != BIT_IOR_EXPR
4162 && rhs_code != BIT_XOR_EXPR
4163 && rhs_code != BIT_AND_EXPR
4164 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4165 return NULL;
4167 rhs2 = gimple_assign_rhs2 (last_stmt);
4169 rhs1_type = search_type_for_mask (rhs1, vinfo);
4170 rhs2_type = search_type_for_mask (rhs2, vinfo);
4172 if (!rhs1_type || !rhs2_type
4173 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4174 return NULL;
4176 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4178 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4179 if (!vectype1)
4180 return NULL;
4181 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4183 else
4185 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4186 if (!vectype1)
4187 return NULL;
4188 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4191 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4192 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4194 *type_out = vectype1;
4195 *type_in = vectype1;
4196 stmts->safe_push (last_stmt);
4197 if (dump_enabled_p ())
4198 dump_printf_loc (MSG_NOTE, vect_location,
4199 "vect_recog_mask_conversion_pattern: detected:\n");
4201 return pattern_stmt;
4204 /* STMT is a load or store. If the load or store is conditional, return
4205 the boolean condition under which it occurs, otherwise return null. */
4207 static tree
4208 vect_get_load_store_mask (gimple *stmt)
4210 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4212 gcc_assert (gimple_assign_single_p (def_assign));
4213 return NULL_TREE;
4216 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4218 internal_fn ifn = gimple_call_internal_fn (def_call);
4219 int mask_index = internal_fn_mask_index (ifn);
4220 return gimple_call_arg (def_call, mask_index);
4223 gcc_unreachable ();
4226 /* Return the scalar offset type that an internal gather/scatter function
4227 should use. GS_INFO describes the gather/scatter operation. */
4229 static tree
4230 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4232 tree offset_type = TREE_TYPE (gs_info->offset);
4233 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4235 /* Enforced by vect_check_gather_scatter. */
4236 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4237 gcc_assert (element_bits >= offset_bits);
4239 /* If the offset is narrower than the elements, extend it according
4240 to its sign. */
4241 if (element_bits > offset_bits)
4242 return build_nonstandard_integer_type (element_bits,
4243 TYPE_UNSIGNED (offset_type));
4245 return offset_type;
4248 /* Return MASK if MASK is suitable for masking an operation on vectors
4249 of type VECTYPE, otherwise convert it into such a form and return
4250 the result. Associate any conversion statements with STMT_INFO's
4251 pattern. */
4253 static tree
4254 vect_convert_mask_for_vectype (tree mask, tree vectype,
4255 stmt_vec_info stmt_info, vec_info *vinfo)
4257 tree mask_type = search_type_for_mask (mask, vinfo);
4258 if (mask_type)
4260 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4261 if (mask_vectype
4262 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4263 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4264 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4266 return mask;
4269 /* Return the equivalent of:
4271 fold_convert (TYPE, VALUE)
4273 with the expectation that the operation will be vectorized.
4274 If new statements are needed, add them as pattern statements
4275 to STMT_INFO. */
4277 static tree
4278 vect_add_conversion_to_patterm (tree type, tree value,
4279 stmt_vec_info stmt_info,
4280 vec_info *vinfo)
4282 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4283 return value;
4285 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4286 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4287 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4288 set_vinfo_for_stmt (conversion, new_stmt_info);
4289 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4290 append_pattern_def_seq (stmt_info, conversion);
4291 return new_value;
4294 /* Try to convert STMT into a call to a gather load or scatter store
4295 internal function. Return the final statement on success and set
4296 *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4298 This function only handles gathers and scatters that were recognized
4299 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4301 static gimple *
4302 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4303 tree *type_in, tree *type_out)
4305 /* Currently we only support this for loop vectorization. */
4306 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4307 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4308 if (!loop_vinfo)
4309 return NULL;
4311 /* Make sure that we're looking at a gather load or scatter store. */
4312 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4313 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4314 return NULL;
4316 /* Get the boolean that controls whether the load or store happens.
4317 This is null if the operation is unconditional. */
4318 tree mask = vect_get_load_store_mask (stmt);
4320 /* Make sure that the target supports an appropriate internal
4321 function for the gather/scatter operation. */
4322 gather_scatter_info gs_info;
4323 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4324 || gs_info.decl)
4325 return NULL;
4327 /* Convert the mask to the right form. */
4328 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4329 if (mask)
4330 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4331 loop_vinfo);
4333 /* Get the invariant base and non-invariant offset, converting the
4334 latter to the same width as the vector elements. */
4335 tree base = gs_info.base;
4336 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4337 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4338 last_stmt_info, loop_vinfo);
4340 /* Build the new pattern statement. */
4341 tree scale = size_int (gs_info.scale);
4342 gcall *pattern_stmt;
4343 if (DR_IS_READ (dr))
4345 if (mask != NULL)
4346 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4347 offset, scale, mask);
4348 else
4349 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4350 offset, scale);
4351 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4352 gimple_call_set_lhs (pattern_stmt, load_lhs);
4354 else
4356 tree rhs = vect_get_store_rhs (stmt);
4357 if (mask != NULL)
4358 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4359 base, offset, scale, rhs,
4360 mask);
4361 else
4362 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4363 base, offset, scale, rhs);
4365 gimple_call_set_nothrow (pattern_stmt, true);
4367 /* Copy across relevant vectorization info and associate DR with the
4368 new pattern statement instead of the original statement. */
4369 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4370 loop_vinfo);
4371 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4372 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4373 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4374 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4375 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4376 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4378 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4379 *type_out = vectype;
4380 *type_in = vectype;
4382 if (dump_enabled_p ())
4383 dump_printf_loc (MSG_NOTE, vect_location,
4384 "gather/scatter pattern detected:\n");
4386 return pattern_stmt;
4389 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4391 static gimple *
4392 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
4393 tree *type_out)
4395 gimple *last_stmt = stmts->pop ();
4396 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4397 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4398 last_stmt_info,
4399 type_in, type_out);
4400 if (pattern_stmt)
4401 stmts->safe_push (last_stmt);
4402 return pattern_stmt;
4405 /* Mark statements that are involved in a pattern. */
4407 static inline void
4408 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4409 tree pattern_vectype)
4411 stmt_vec_info pattern_stmt_info, def_stmt_info;
4412 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4413 vec_info *vinfo = orig_stmt_info->vinfo;
4414 gimple *def_stmt;
4416 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4417 if (pattern_stmt_info == NULL)
4419 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4420 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4422 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4424 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4425 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4426 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4427 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4428 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4429 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4430 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
4431 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4432 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
4434 gimple_stmt_iterator si;
4435 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
4436 !gsi_end_p (si); gsi_next (&si))
4438 def_stmt = gsi_stmt (si);
4439 def_stmt_info = vinfo_for_stmt (def_stmt);
4440 if (def_stmt_info == NULL)
4442 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4443 set_vinfo_for_stmt (def_stmt, def_stmt_info);
4445 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4446 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4447 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4448 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4449 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4454 /* Function vect_pattern_recog_1
4456 Input:
4457 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4458 computation pattern.
4459 STMT: A stmt from which the pattern search should start.
4461 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4462 expression that computes the same functionality and can be used to
4463 replace the sequence of stmts that are involved in the pattern.
4465 Output:
4466 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4467 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4468 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4469 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4470 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4471 to the available target pattern.
4473 This function also does some bookkeeping, as explained in the documentation
4474 for vect_recog_pattern. */
4476 static bool
4477 vect_pattern_recog_1 (vect_recog_func *recog_func,
4478 gimple_stmt_iterator si,
4479 vec<gimple *> *stmts_to_replace)
4481 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4482 stmt_vec_info stmt_info;
4483 loop_vec_info loop_vinfo;
4484 tree pattern_vectype;
4485 tree type_in, type_out;
4486 enum tree_code code;
4487 int i;
4489 stmts_to_replace->truncate (0);
4490 stmts_to_replace->quick_push (stmt);
4491 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4492 if (!pattern_stmt)
4493 return false;
4495 stmt = stmts_to_replace->last ();
4496 stmt_info = vinfo_for_stmt (stmt);
4497 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4499 if (VECTOR_BOOLEAN_TYPE_P (type_in)
4500 || VECTOR_TYPE_P (type_in))
4502 /* No need to check target support (already checked by the pattern
4503 recognition function). */
4504 pattern_vectype = type_out ? type_out : type_in;
4506 else
4508 /* Check target support */
4509 type_in = get_vectype_for_scalar_type (type_in);
4510 if (!type_in)
4511 return false;
4512 if (type_out)
4513 type_out = get_vectype_for_scalar_type (type_out);
4514 else
4515 type_out = type_in;
4516 if (!type_out)
4517 return false;
4518 pattern_vectype = type_out;
4520 if (is_gimple_assign (pattern_stmt))
4522 enum insn_code icode;
4523 code = gimple_assign_rhs_code (pattern_stmt);
4524 optab optab = optab_for_tree_code (code, type_in, optab_default);
4525 machine_mode vec_mode = TYPE_MODE (type_in);
4526 if (!optab
4527 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4528 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4529 return false;
4531 else
4532 gcc_assert (is_gimple_call (pattern_stmt));
4535 /* Found a vectorizable pattern. */
4536 if (dump_enabled_p ())
4538 dump_printf_loc (MSG_NOTE, vect_location,
4539 "%s pattern recognized: ", recog_func->name);
4540 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4543 /* Mark the stmts that are involved in the pattern. */
4544 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4546 /* Patterns cannot be vectorized using SLP, because they change the order of
4547 computation. */
4548 if (loop_vinfo)
4550 unsigned ix, ix2;
4551 gimple **elem_ptr;
4552 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4553 elem_ptr, *elem_ptr == stmt);
4556 /* It is possible that additional pattern stmts are created and inserted in
4557 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4558 relevant statements. */
4559 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4560 && (unsigned) i < (stmts_to_replace->length () - 1);
4561 i++)
4563 stmt_info = vinfo_for_stmt (stmt);
4564 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4565 if (dump_enabled_p ())
4567 dump_printf_loc (MSG_NOTE, vect_location,
4568 "additional pattern stmt: ");
4569 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4572 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4575 return true;
4579 /* Function vect_pattern_recog
4581 Input:
4582 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4583 computation idioms.
4585 Output - for each computation idiom that is detected we create a new stmt
4586 that provides the same functionality and that can be vectorized. We
4587 also record some information in the struct_stmt_info of the relevant
4588 stmts, as explained below:
4590 At the entry to this function we have the following stmts, with the
4591 following initial value in the STMT_VINFO fields:
4593 stmt in_pattern_p related_stmt vec_stmt
4594 S1: a_i = .... - - -
4595 S2: a_2 = ..use(a_i).. - - -
4596 S3: a_1 = ..use(a_2).. - - -
4597 S4: a_0 = ..use(a_1).. - - -
4598 S5: ... = ..use(a_0).. - - -
4600 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4601 represented by a single stmt. We then:
4602 - create a new stmt S6 equivalent to the pattern (the stmt is not
4603 inserted into the code)
4604 - fill in the STMT_VINFO fields as follows:
4606 in_pattern_p related_stmt vec_stmt
4607 S1: a_i = .... - - -
4608 S2: a_2 = ..use(a_i).. - - -
4609 S3: a_1 = ..use(a_2).. - - -
4610 S4: a_0 = ..use(a_1).. true S6 -
4611 '---> S6: a_new = .... - S4 -
4612 S5: ... = ..use(a_0).. - - -
4614 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4615 to each other through the RELATED_STMT field).
4617 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4618 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4619 remain irrelevant unless used by stmts other than S4.
4621 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4622 (because they are marked as irrelevant). It will vectorize S6, and record
4623 a pointer to the new vector stmt VS6 from S6 (as usual).
4624 S4 will be skipped, and S5 will be vectorized as usual:
4626 in_pattern_p related_stmt vec_stmt
4627 S1: a_i = .... - - -
4628 S2: a_2 = ..use(a_i).. - - -
4629 S3: a_1 = ..use(a_2).. - - -
4630 > VS6: va_new = .... - - -
4631 S4: a_0 = ..use(a_1).. true S6 VS6
4632 '---> S6: a_new = .... - S4 VS6
4633 > VS5: ... = ..vuse(va_new).. - - -
4634 S5: ... = ..use(a_0).. - - -
4636 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4637 elsewhere), and we'll end up with:
4639 VS6: va_new = ....
4640 VS5: ... = ..vuse(va_new)..
4642 In case of more than one pattern statements, e.g., widen-mult with
4643 intermediate type:
4645 S1 a_t = ;
4646 S2 a_T = (TYPE) a_t;
4647 '--> S3: a_it = (interm_type) a_t;
4648 S4 prod_T = a_T * CONST;
4649 '--> S5: prod_T' = a_it w* CONST;
4651 there may be other users of a_T outside the pattern. In that case S2 will
4652 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4653 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4654 be recorded in S3. */
4656 void
4657 vect_pattern_recog (vec_info *vinfo)
4659 struct loop *loop;
4660 basic_block *bbs;
4661 unsigned int nbbs;
4662 gimple_stmt_iterator si;
4663 unsigned int i, j;
4664 auto_vec<gimple *, 1> stmts_to_replace;
4665 gimple *stmt;
4667 if (dump_enabled_p ())
4668 dump_printf_loc (MSG_NOTE, vect_location,
4669 "=== vect_pattern_recog ===\n");
4671 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4673 loop = LOOP_VINFO_LOOP (loop_vinfo);
4674 bbs = LOOP_VINFO_BBS (loop_vinfo);
4675 nbbs = loop->num_nodes;
4677 /* Scan through the loop stmts, applying the pattern recognition
4678 functions starting at each stmt visited: */
4679 for (i = 0; i < nbbs; i++)
4681 basic_block bb = bbs[i];
4682 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4684 /* Scan over all generic vect_recog_xxx_pattern functions. */
4685 for (j = 0; j < NUM_PATTERNS; j++)
4686 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4687 &stmts_to_replace))
4688 break;
4692 else
4694 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4695 for (si = bb_vinfo->region_begin;
4696 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4698 if ((stmt = gsi_stmt (si))
4699 && vinfo_for_stmt (stmt)
4700 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4701 continue;
4703 /* Scan over all generic vect_recog_xxx_pattern functions. */
4704 for (j = 0; j < NUM_PATTERNS; j++)
4705 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4706 &stmts_to_replace))
4707 break;