Introduce DUMP_VECT_SCOPE macro
[official-gcc.git] / gcc / tree-vect-patterns.c
blobc530810aa3e5a92c19453c494a2db8d1c15aeefd
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Pattern recognition functions */
51 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
52 tree *);
53 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
54 tree *);
55 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
56 tree *);
57 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
58 tree *);
59 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
60 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
61 tree *);
62 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
63 tree *, tree *);
64 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
65 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
66 tree *, tree *);
67 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
68 tree *, tree *);
70 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
71 tree *, tree *);
73 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
74 tree *, tree *);
75 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
76 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
77 static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
78 tree *);
80 struct vect_recog_func
82 vect_recog_func_ptr fn;
83 const char *name;
86 /* Note that ordering matters - the first pattern matching on a stmt
87 is taken which means usually the more complex one needs to preceed
88 the less comples onex (widen_sum only after dot_prod or sad for example). */
89 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
90 { vect_recog_widen_mult_pattern, "widen_mult" },
91 { vect_recog_dot_prod_pattern, "dot_prod" },
92 { vect_recog_sad_pattern, "sad" },
93 { vect_recog_widen_sum_pattern, "widen_sum" },
94 { vect_recog_pow_pattern, "pow" },
95 { vect_recog_widen_shift_pattern, "widen_shift" },
96 { vect_recog_over_widening_pattern, "over_widening" },
97 { vect_recog_rotate_pattern, "rotate" },
98 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
99 { vect_recog_divmod_pattern, "divmod" },
100 { vect_recog_mult_pattern, "mult" },
101 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
102 { vect_recog_bool_pattern, "bool" },
103 /* This must come before mask conversion, and includes the parts
104 of mask conversion that are needed for gather and scatter
105 internal functions. */
106 { vect_recog_gather_scatter_pattern, "gather_scatter" },
107 { vect_recog_mask_conversion_pattern, "mask_conversion" }
110 static inline void
111 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
113 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
114 stmt);
117 static inline void
118 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
120 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
121 append_pattern_def_seq (stmt_info, stmt);
124 /* Check whether STMT2 is in the same loop or basic block as STMT1.
125 Which of the two applies depends on whether we're currently doing
126 loop-based or basic-block-based vectorization, as determined by
127 the vinfo_for_stmt for STMT1 (which must be defined).
129 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
130 to be defined as well. */
132 static bool
133 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
135 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
136 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
139 /* If the LHS of DEF_STMT has a single use, and that statement is
140 in the same loop or basic block, return it. */
142 static gimple *
143 vect_single_imm_use (gimple *def_stmt)
145 tree lhs = gimple_assign_lhs (def_stmt);
146 use_operand_p use_p;
147 gimple *use_stmt;
149 if (!single_imm_use (lhs, &use_p, &use_stmt))
150 return NULL;
152 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
153 return NULL;
155 return use_stmt;
158 /* Check whether NAME, an ssa-name used in USE_STMT,
159 is a result of a type promotion, such that:
160 DEF_STMT: NAME = NOP (name0)
161 If CHECK_SIGN is TRUE, check that either both types are signed or both are
162 unsigned. */
164 static bool
165 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
166 tree *orig_type, gimple **def_stmt, bool *promotion)
168 gimple *dummy_gimple;
169 stmt_vec_info stmt_vinfo;
170 tree type = TREE_TYPE (name);
171 tree oprnd0;
172 enum vect_def_type dt;
174 stmt_vinfo = vinfo_for_stmt (use_stmt);
175 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
176 return false;
178 if (dt != vect_internal_def
179 && dt != vect_external_def && dt != vect_constant_def)
180 return false;
182 if (!*def_stmt)
183 return false;
185 if (dt == vect_internal_def)
187 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
188 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
189 return false;
192 if (!is_gimple_assign (*def_stmt))
193 return false;
195 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
196 return false;
198 oprnd0 = gimple_assign_rhs1 (*def_stmt);
200 *orig_type = TREE_TYPE (oprnd0);
201 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
202 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
203 return false;
205 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
206 *promotion = true;
207 else
208 *promotion = false;
210 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
211 return false;
213 return true;
216 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
217 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
219 static tree
220 vect_recog_temp_ssa_var (tree type, gimple *stmt)
222 return make_temp_ssa_name (type, stmt, "patt");
225 /* Return true if STMT_VINFO describes a reduction for which reassociation
226 is allowed. If STMT_INFO is part of a group, assume that it's part of
227 a reduction chain and optimistically assume that all statements
228 except the last allow reassociation. */
230 static bool
231 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
233 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
234 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
235 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
238 /* Function vect_recog_dot_prod_pattern
240 Try to find the following pattern:
242 type x_t, y_t;
243 TYPE1 prod;
244 TYPE2 sum = init;
245 loop:
246 sum_0 = phi <init, sum_1>
247 S1 x_t = ...
248 S2 y_t = ...
249 S3 x_T = (TYPE1) x_t;
250 S4 y_T = (TYPE1) y_t;
251 S5 prod = x_T * y_T;
252 [S6 prod = (TYPE2) prod; #optional]
253 S7 sum_1 = prod + sum_0;
255 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
256 same size of 'TYPE1' or bigger. This is a special case of a reduction
257 computation.
259 Input:
261 * STMTS: Contains a stmt from which the pattern search begins. In the
262 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
263 will be detected.
265 Output:
267 * TYPE_IN: The type of the input arguments to the pattern.
269 * TYPE_OUT: The type of the output of this pattern.
271 * Return value: A new stmt that will be used to replace the sequence of
272 stmts that constitute the pattern. In this case it will be:
273 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
275 Note: The dot-prod idiom is a widening reduction pattern that is
276 vectorized without preserving all the intermediate results. It
277 produces only N/2 (widened) results (by summing up pairs of
278 intermediate results) rather than all N results. Therefore, we
279 cannot allow this pattern when we want to get all the results and in
280 the correct order (as is the case when this computation is in an
281 inner-loop nested in an outer-loop that us being vectorized). */
283 static gimple *
284 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
285 tree *type_out)
287 gimple *stmt, *last_stmt = (*stmts)[0];
288 tree oprnd0, oprnd1;
289 tree oprnd00, oprnd01;
290 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
291 tree type, half_type;
292 gimple *pattern_stmt;
293 tree prod_type;
294 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
295 struct loop *loop;
296 tree var;
297 bool promotion;
299 if (!loop_info)
300 return NULL;
302 loop = LOOP_VINFO_LOOP (loop_info);
304 /* We don't allow changing the order of the computation in the inner-loop
305 when doing outer-loop vectorization. */
306 if (loop && nested_in_vect_loop_p (loop, last_stmt))
307 return NULL;
309 if (!is_gimple_assign (last_stmt))
310 return NULL;
312 type = gimple_expr_type (last_stmt);
314 /* Look for the following pattern
315 DX = (TYPE1) X;
316 DY = (TYPE1) Y;
317 DPROD = DX * DY;
318 DDPROD = (TYPE2) DPROD;
319 sum_1 = DDPROD + sum_0;
320 In which
321 - DX is double the size of X
322 - DY is double the size of Y
323 - DX, DY, DPROD all have the same type
324 - sum is the same size of DPROD or bigger
325 - sum has been recognized as a reduction variable.
327 This is equivalent to:
328 DPROD = X w* Y; #widen mult
329 sum_1 = DPROD w+ sum_0; #widen summation
331 DPROD = X w* Y; #widen mult
332 sum_1 = DPROD + sum_0; #summation
335 /* Starting from LAST_STMT, follow the defs of its uses in search
336 of the above pattern. */
338 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
339 return NULL;
341 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
343 /* Has been detected as widening-summation? */
345 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
346 type = gimple_expr_type (stmt);
347 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
348 return NULL;
349 oprnd0 = gimple_assign_rhs1 (stmt);
350 oprnd1 = gimple_assign_rhs2 (stmt);
351 half_type = TREE_TYPE (oprnd0);
353 else
355 gimple *def_stmt;
357 if (!vect_reassociating_reduction_p (stmt_vinfo))
358 return NULL;
359 oprnd0 = gimple_assign_rhs1 (last_stmt);
360 oprnd1 = gimple_assign_rhs2 (last_stmt);
361 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
362 || !types_compatible_p (TREE_TYPE (oprnd1), type))
363 return NULL;
364 stmt = last_stmt;
366 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
367 &promotion)
368 && promotion)
370 stmt = def_stmt;
371 oprnd0 = gimple_assign_rhs1 (stmt);
373 else
374 half_type = type;
377 /* So far so good. Since last_stmt was detected as a (summation) reduction,
378 we know that oprnd1 is the reduction variable (defined by a loop-header
379 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
380 Left to check that oprnd0 is defined by a (widen_)mult_expr */
381 if (TREE_CODE (oprnd0) != SSA_NAME)
382 return NULL;
384 prod_type = half_type;
385 stmt = SSA_NAME_DEF_STMT (oprnd0);
387 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
388 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
389 return NULL;
391 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
392 inside the loop (in case we are analyzing an outer-loop). */
393 if (!is_gimple_assign (stmt))
394 return NULL;
395 stmt_vinfo = vinfo_for_stmt (stmt);
396 gcc_assert (stmt_vinfo);
397 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
398 return NULL;
399 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
400 return NULL;
401 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
403 /* Has been detected as a widening multiplication? */
405 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
406 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
407 return NULL;
408 stmt_vinfo = vinfo_for_stmt (stmt);
409 gcc_assert (stmt_vinfo);
410 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
411 oprnd00 = gimple_assign_rhs1 (stmt);
412 oprnd01 = gimple_assign_rhs2 (stmt);
413 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
414 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
416 else
418 tree half_type0, half_type1;
419 gimple *def_stmt;
420 tree oprnd0, oprnd1;
422 oprnd0 = gimple_assign_rhs1 (stmt);
423 oprnd1 = gimple_assign_rhs2 (stmt);
424 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
425 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
426 return NULL;
427 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
428 &promotion)
429 || !promotion)
430 return NULL;
431 oprnd00 = gimple_assign_rhs1 (def_stmt);
432 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
433 &promotion)
434 || !promotion)
435 return NULL;
436 oprnd01 = gimple_assign_rhs1 (def_stmt);
437 if (!types_compatible_p (half_type0, half_type1))
438 return NULL;
439 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
440 return NULL;
443 half_type = TREE_TYPE (oprnd00);
444 *type_in = half_type;
445 *type_out = type;
447 /* Pattern detected. Create a stmt to be used to replace the pattern: */
448 var = vect_recog_temp_ssa_var (type, NULL);
449 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
450 oprnd00, oprnd01, oprnd1);
452 if (dump_enabled_p ())
454 dump_printf_loc (MSG_NOTE, vect_location,
455 "vect_recog_dot_prod_pattern: detected: ");
456 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
459 return pattern_stmt;
463 /* Function vect_recog_sad_pattern
465 Try to find the following Sum of Absolute Difference (SAD) pattern:
467 type x_t, y_t;
468 signed TYPE1 diff, abs_diff;
469 TYPE2 sum = init;
470 loop:
471 sum_0 = phi <init, sum_1>
472 S1 x_t = ...
473 S2 y_t = ...
474 S3 x_T = (TYPE1) x_t;
475 S4 y_T = (TYPE1) y_t;
476 S5 diff = x_T - y_T;
477 S6 abs_diff = ABS_EXPR <diff>;
478 [S7 abs_diff = (TYPE2) abs_diff; #optional]
479 S8 sum_1 = abs_diff + sum_0;
481 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
482 same size of 'TYPE1' or bigger. This is a special case of a reduction
483 computation.
485 Input:
487 * STMTS: Contains a stmt from which the pattern search begins. In the
488 example, when this function is called with S8, the pattern
489 {S3,S4,S5,S6,S7,S8} will be detected.
491 Output:
493 * TYPE_IN: The type of the input arguments to the pattern.
495 * TYPE_OUT: The type of the output of this pattern.
497 * Return value: A new stmt that will be used to replace the sequence of
498 stmts that constitute the pattern. In this case it will be:
499 SAD_EXPR <x_t, y_t, sum_0>
502 static gimple *
503 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
504 tree *type_out)
506 gimple *last_stmt = (*stmts)[0];
507 tree sad_oprnd0, sad_oprnd1;
508 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
509 tree half_type;
510 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
511 struct loop *loop;
512 bool promotion;
514 if (!loop_info)
515 return NULL;
517 loop = LOOP_VINFO_LOOP (loop_info);
519 /* We don't allow changing the order of the computation in the inner-loop
520 when doing outer-loop vectorization. */
521 if (loop && nested_in_vect_loop_p (loop, last_stmt))
522 return NULL;
524 if (!is_gimple_assign (last_stmt))
525 return NULL;
527 tree sum_type = gimple_expr_type (last_stmt);
529 /* Look for the following pattern
530 DX = (TYPE1) X;
531 DY = (TYPE1) Y;
532 DDIFF = DX - DY;
533 DAD = ABS_EXPR <DDIFF>;
534 DDPROD = (TYPE2) DPROD;
535 sum_1 = DAD + sum_0;
536 In which
537 - DX is at least double the size of X
538 - DY is at least double the size of Y
539 - DX, DY, DDIFF, DAD all have the same type
540 - sum is the same size of DAD or bigger
541 - sum has been recognized as a reduction variable.
543 This is equivalent to:
544 DDIFF = X w- Y; #widen sub
545 DAD = ABS_EXPR <DDIFF>;
546 sum_1 = DAD w+ sum_0; #widen summation
548 DDIFF = X w- Y; #widen sub
549 DAD = ABS_EXPR <DDIFF>;
550 sum_1 = DAD + sum_0; #summation
553 /* Starting from LAST_STMT, follow the defs of its uses in search
554 of the above pattern. */
556 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
557 return NULL;
559 tree plus_oprnd0, plus_oprnd1;
561 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
563 /* Has been detected as widening-summation? */
565 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
566 sum_type = gimple_expr_type (stmt);
567 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
568 return NULL;
569 plus_oprnd0 = gimple_assign_rhs1 (stmt);
570 plus_oprnd1 = gimple_assign_rhs2 (stmt);
571 half_type = TREE_TYPE (plus_oprnd0);
573 else
575 gimple *def_stmt;
577 if (!vect_reassociating_reduction_p (stmt_vinfo))
578 return NULL;
579 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
580 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
581 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
582 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
583 return NULL;
585 /* The type conversion could be promotion, demotion,
586 or just signed -> unsigned. */
587 if (type_conversion_p (plus_oprnd0, last_stmt, false,
588 &half_type, &def_stmt, &promotion))
589 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
590 else
591 half_type = sum_type;
594 /* So far so good. Since last_stmt was detected as a (summation) reduction,
595 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
596 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
597 Then check that plus_oprnd0 is defined by an abs_expr. */
599 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
600 return NULL;
602 tree abs_type = half_type;
603 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
605 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
606 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
607 return NULL;
609 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
610 inside the loop (in case we are analyzing an outer-loop). */
611 if (!is_gimple_assign (abs_stmt))
612 return NULL;
614 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
615 gcc_assert (abs_stmt_vinfo);
616 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
617 return NULL;
618 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
619 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR)
620 return NULL;
622 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
623 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
624 return NULL;
625 if (TYPE_UNSIGNED (abs_type))
626 return NULL;
628 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
630 if (TREE_CODE (abs_oprnd) != SSA_NAME)
631 return NULL;
633 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
635 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
636 if (!gimple_bb (diff_stmt)
637 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
638 return NULL;
640 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
641 inside the loop (in case we are analyzing an outer-loop). */
642 if (!is_gimple_assign (diff_stmt))
643 return NULL;
645 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
646 gcc_assert (diff_stmt_vinfo);
647 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
648 return NULL;
649 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
650 return NULL;
652 tree half_type0, half_type1;
653 gimple *def_stmt;
655 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
656 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
658 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
659 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
660 return NULL;
661 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
662 &half_type0, &def_stmt, &promotion)
663 || !promotion)
664 return NULL;
665 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
667 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
668 &half_type1, &def_stmt, &promotion)
669 || !promotion)
670 return NULL;
671 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
673 if (!types_compatible_p (half_type0, half_type1))
674 return NULL;
675 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
676 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
677 return NULL;
679 *type_in = TREE_TYPE (sad_oprnd0);
680 *type_out = sum_type;
682 /* Pattern detected. Create a stmt to be used to replace the pattern: */
683 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
684 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
685 sad_oprnd1, plus_oprnd1);
687 if (dump_enabled_p ())
689 dump_printf_loc (MSG_NOTE, vect_location,
690 "vect_recog_sad_pattern: detected: ");
691 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
694 return pattern_stmt;
698 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
699 and LSHIFT_EXPR.
701 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
702 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
704 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
705 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
706 that satisfies the above restrictions, we can perform a widening opeartion
707 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
708 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
710 static bool
711 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
712 tree const_oprnd, tree *oprnd,
713 gimple **wstmt, tree type,
714 tree *half_type, gimple *def_stmt)
716 tree new_type, new_oprnd;
718 if (code != MULT_EXPR && code != LSHIFT_EXPR)
719 return false;
721 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
722 || (code == LSHIFT_EXPR
723 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
724 != 1))
725 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
727 /* CONST_OPRND is a constant of HALF_TYPE. */
728 *oprnd = gimple_assign_rhs1 (def_stmt);
729 return true;
732 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
733 return false;
735 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
736 return false;
738 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
739 a type 2 times bigger than HALF_TYPE. */
740 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
741 TYPE_UNSIGNED (type));
742 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
743 || (code == LSHIFT_EXPR
744 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
745 return false;
747 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
748 *oprnd = gimple_assign_rhs1 (def_stmt);
749 new_oprnd = make_ssa_name (new_type);
750 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
751 *oprnd = new_oprnd;
753 *half_type = new_type;
754 return true;
758 /* Function vect_recog_widen_mult_pattern
760 Try to find the following pattern:
762 type1 a_t;
763 type2 b_t;
764 TYPE a_T, b_T, prod_T;
766 S1 a_t = ;
767 S2 b_t = ;
768 S3 a_T = (TYPE) a_t;
769 S4 b_T = (TYPE) b_t;
770 S5 prod_T = a_T * b_T;
772 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
774 Also detect unsigned cases:
776 unsigned type1 a_t;
777 unsigned type2 b_t;
778 unsigned TYPE u_prod_T;
779 TYPE a_T, b_T, prod_T;
781 S1 a_t = ;
782 S2 b_t = ;
783 S3 a_T = (TYPE) a_t;
784 S4 b_T = (TYPE) b_t;
785 S5 prod_T = a_T * b_T;
786 S6 u_prod_T = (unsigned TYPE) prod_T;
788 and multiplication by constants:
790 type a_t;
791 TYPE a_T, prod_T;
793 S1 a_t = ;
794 S3 a_T = (TYPE) a_t;
795 S5 prod_T = a_T * CONST;
797 A special case of multiplication by constants is when 'TYPE' is 4 times
798 bigger than 'type', but CONST fits an intermediate type 2 times smaller
799 than 'TYPE'. In that case we create an additional pattern stmt for S3
800 to create a variable of the intermediate type, and perform widen-mult
801 on the intermediate type as well:
803 type a_t;
804 interm_type a_it;
805 TYPE a_T, prod_T, prod_T';
807 S1 a_t = ;
808 S3 a_T = (TYPE) a_t;
809 '--> a_it = (interm_type) a_t;
810 S5 prod_T = a_T * CONST;
811 '--> prod_T' = a_it w* CONST;
813 Input/Output:
815 * STMTS: Contains a stmt from which the pattern search begins. In the
816 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
817 is detected. In case of unsigned widen-mult, the original stmt (S5) is
818 replaced with S6 in STMTS. In case of multiplication by a constant
819 of an intermediate type (the last case above), STMTS also contains S3
820 (inserted before S5).
822 Output:
824 * TYPE_IN: The type of the input arguments to the pattern.
826 * TYPE_OUT: The type of the output of this pattern.
828 * Return value: A new stmt that will be used to replace the sequence of
829 stmts that constitute the pattern. In this case it will be:
830 WIDEN_MULT <a_t, b_t>
831 If the result of WIDEN_MULT needs to be converted to a larger type, the
832 returned stmt will be this type conversion stmt.
835 static gimple *
836 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
837 tree *type_in, tree *type_out)
839 gimple *last_stmt = stmts->pop ();
840 gimple *def_stmt0, *def_stmt1;
841 tree oprnd0, oprnd1;
842 tree type, half_type0, half_type1;
843 gimple *new_stmt = NULL, *pattern_stmt = NULL;
844 tree vectype, vecitype;
845 tree var;
846 enum tree_code dummy_code;
847 int dummy_int;
848 vec<tree> dummy_vec;
849 bool op1_ok;
850 bool promotion;
852 if (!is_gimple_assign (last_stmt))
853 return NULL;
855 type = gimple_expr_type (last_stmt);
857 /* Starting from LAST_STMT, follow the defs of its uses in search
858 of the above pattern. */
860 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
861 return NULL;
863 oprnd0 = gimple_assign_rhs1 (last_stmt);
864 oprnd1 = gimple_assign_rhs2 (last_stmt);
865 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
866 || !types_compatible_p (TREE_TYPE (oprnd1), type))
867 return NULL;
869 /* Check argument 0. */
870 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
871 &promotion)
872 || !promotion)
873 return NULL;
874 /* Check argument 1. */
875 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
876 &def_stmt1, &promotion);
878 if (op1_ok && promotion)
880 oprnd0 = gimple_assign_rhs1 (def_stmt0);
881 oprnd1 = gimple_assign_rhs1 (def_stmt1);
883 else
885 if (TREE_CODE (oprnd1) == INTEGER_CST
886 && TREE_CODE (half_type0) == INTEGER_TYPE
887 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
888 &oprnd0, &new_stmt, type,
889 &half_type0, def_stmt0))
891 half_type1 = half_type0;
892 oprnd1 = fold_convert (half_type1, oprnd1);
894 else
895 return NULL;
898 /* If the two arguments have different sizes, convert the one with
899 the smaller type into the larger type. */
900 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
902 /* If we already used up the single-stmt slot give up. */
903 if (new_stmt)
904 return NULL;
906 tree* oprnd = NULL;
907 gimple *def_stmt = NULL;
909 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
911 def_stmt = def_stmt0;
912 half_type0 = half_type1;
913 oprnd = &oprnd0;
915 else
917 def_stmt = def_stmt1;
918 half_type1 = half_type0;
919 oprnd = &oprnd1;
922 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
923 tree new_oprnd = make_ssa_name (half_type0);
924 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
925 *oprnd = new_oprnd;
928 /* Handle unsigned case. Look for
929 S6 u_prod_T = (unsigned TYPE) prod_T;
930 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
931 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
933 gimple *use_stmt;
934 tree use_lhs;
935 tree use_type;
937 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
938 return NULL;
940 use_stmt = vect_single_imm_use (last_stmt);
941 if (!use_stmt || !is_gimple_assign (use_stmt)
942 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
943 return NULL;
945 use_lhs = gimple_assign_lhs (use_stmt);
946 use_type = TREE_TYPE (use_lhs);
947 if (!INTEGRAL_TYPE_P (use_type)
948 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
949 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
950 return NULL;
952 type = use_type;
953 last_stmt = use_stmt;
956 if (!types_compatible_p (half_type0, half_type1))
957 return NULL;
959 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
960 to get an intermediate result of type ITYPE. In this case we need
961 to build a statement to convert this intermediate result to type TYPE. */
962 tree itype = type;
963 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
964 itype = build_nonstandard_integer_type
965 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
966 TYPE_UNSIGNED (type));
968 /* Pattern detected. */
969 if (dump_enabled_p ())
970 dump_printf_loc (MSG_NOTE, vect_location,
971 "vect_recog_widen_mult_pattern: detected:\n");
973 /* Check target support */
974 vectype = get_vectype_for_scalar_type (half_type0);
975 vecitype = get_vectype_for_scalar_type (itype);
976 if (!vectype
977 || !vecitype
978 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
979 vecitype, vectype,
980 &dummy_code, &dummy_code,
981 &dummy_int, &dummy_vec))
982 return NULL;
984 *type_in = vectype;
985 *type_out = get_vectype_for_scalar_type (type);
987 /* Pattern supported. Create a stmt to be used to replace the pattern: */
988 var = vect_recog_temp_ssa_var (itype, NULL);
989 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
991 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
992 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
994 /* If the original two operands have different sizes, we may need to convert
995 the smaller one into the larget type. If this is the case, at this point
996 the new stmt is already built. */
997 if (new_stmt)
999 append_pattern_def_seq (stmt_vinfo, new_stmt);
1000 stmt_vec_info new_stmt_info
1001 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
1002 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1003 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1006 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1007 the result of the widen-mult operation into type TYPE. */
1008 if (itype != type)
1010 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1011 stmt_vec_info pattern_stmt_info
1012 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
1013 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1014 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1015 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1016 NOP_EXPR,
1017 gimple_assign_lhs (pattern_stmt));
1020 if (dump_enabled_p ())
1021 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1023 stmts->safe_push (last_stmt);
1024 return pattern_stmt;
1028 /* Function vect_recog_pow_pattern
1030 Try to find the following pattern:
1032 x = POW (y, N);
1034 with POW being one of pow, powf, powi, powif and N being
1035 either 2 or 0.5.
1037 Input:
1039 * LAST_STMT: A stmt from which the pattern search begins.
1041 Output:
1043 * TYPE_IN: The type of the input arguments to the pattern.
1045 * TYPE_OUT: The type of the output of this pattern.
1047 * Return value: A new stmt that will be used to replace the sequence of
1048 stmts that constitute the pattern. In this case it will be:
1049 x = x * x
1051 x = sqrt (x)
1054 static gimple *
1055 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1056 tree *type_out)
1058 gimple *last_stmt = (*stmts)[0];
1059 tree base, exp;
1060 gimple *stmt;
1061 tree var;
1063 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1064 return NULL;
1066 switch (gimple_call_combined_fn (last_stmt))
1068 CASE_CFN_POW:
1069 CASE_CFN_POWI:
1070 break;
1072 default:
1073 return NULL;
1076 base = gimple_call_arg (last_stmt, 0);
1077 exp = gimple_call_arg (last_stmt, 1);
1078 if (TREE_CODE (exp) != REAL_CST
1079 && TREE_CODE (exp) != INTEGER_CST)
1081 if (flag_unsafe_math_optimizations
1082 && TREE_CODE (base) == REAL_CST
1083 && !gimple_call_internal_p (last_stmt))
1085 combined_fn log_cfn;
1086 built_in_function exp_bfn;
1087 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1089 case BUILT_IN_POW:
1090 log_cfn = CFN_BUILT_IN_LOG;
1091 exp_bfn = BUILT_IN_EXP;
1092 break;
1093 case BUILT_IN_POWF:
1094 log_cfn = CFN_BUILT_IN_LOGF;
1095 exp_bfn = BUILT_IN_EXPF;
1096 break;
1097 case BUILT_IN_POWL:
1098 log_cfn = CFN_BUILT_IN_LOGL;
1099 exp_bfn = BUILT_IN_EXPL;
1100 break;
1101 default:
1102 return NULL;
1104 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1105 tree exp_decl = builtin_decl_implicit (exp_bfn);
1106 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1107 does that, but if C is a power of 2, we want to use
1108 exp2 (log2 (C) * x) in the non-vectorized version, but for
1109 vectorization we don't have vectorized exp2. */
1110 if (logc
1111 && TREE_CODE (logc) == REAL_CST
1112 && exp_decl
1113 && lookup_attribute ("omp declare simd",
1114 DECL_ATTRIBUTES (exp_decl)))
1116 cgraph_node *node = cgraph_node::get_create (exp_decl);
1117 if (node->simd_clones == NULL)
1119 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1120 || node->definition)
1121 return NULL;
1122 expand_simd_clones (node);
1123 if (node->simd_clones == NULL)
1124 return NULL;
1126 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1127 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1128 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1129 new_pattern_def_seq (stmt_vinfo, g);
1130 *type_in = TREE_TYPE (base);
1131 *type_out = NULL_TREE;
1132 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1133 g = gimple_build_call (exp_decl, 1, def);
1134 gimple_call_set_lhs (g, res);
1135 return g;
1139 return NULL;
1142 /* We now have a pow or powi builtin function call with a constant
1143 exponent. */
1145 *type_out = NULL_TREE;
1147 /* Catch squaring. */
1148 if ((tree_fits_shwi_p (exp)
1149 && tree_to_shwi (exp) == 2)
1150 || (TREE_CODE (exp) == REAL_CST
1151 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1153 *type_in = TREE_TYPE (base);
1155 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1156 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1157 return stmt;
1160 /* Catch square root. */
1161 if (TREE_CODE (exp) == REAL_CST
1162 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1164 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1165 if (*type_in
1166 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1167 OPTIMIZE_FOR_SPEED))
1169 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1170 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1171 gimple_call_set_lhs (stmt, var);
1172 gimple_call_set_nothrow (stmt, true);
1173 return stmt;
1177 return NULL;
1181 /* Function vect_recog_widen_sum_pattern
1183 Try to find the following pattern:
1185 type x_t;
1186 TYPE x_T, sum = init;
1187 loop:
1188 sum_0 = phi <init, sum_1>
1189 S1 x_t = *p;
1190 S2 x_T = (TYPE) x_t;
1191 S3 sum_1 = x_T + sum_0;
1193 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1194 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1195 a special case of a reduction computation.
1197 Input:
1199 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1200 when this function is called with S3, the pattern {S2,S3} will be detected.
1202 Output:
1204 * TYPE_IN: The type of the input arguments to the pattern.
1206 * TYPE_OUT: The type of the output of this pattern.
1208 * Return value: A new stmt that will be used to replace the sequence of
1209 stmts that constitute the pattern. In this case it will be:
1210 WIDEN_SUM <x_t, sum_0>
1212 Note: The widening-sum idiom is a widening reduction pattern that is
1213 vectorized without preserving all the intermediate results. It
1214 produces only N/2 (widened) results (by summing up pairs of
1215 intermediate results) rather than all N results. Therefore, we
1216 cannot allow this pattern when we want to get all the results and in
1217 the correct order (as is the case when this computation is in an
1218 inner-loop nested in an outer-loop that us being vectorized). */
1220 static gimple *
1221 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1222 tree *type_out)
1224 gimple *stmt, *last_stmt = (*stmts)[0];
1225 tree oprnd0, oprnd1;
1226 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1227 tree type, half_type;
1228 gimple *pattern_stmt;
1229 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1230 struct loop *loop;
1231 tree var;
1232 bool promotion;
1234 if (!loop_info)
1235 return NULL;
1237 loop = LOOP_VINFO_LOOP (loop_info);
1239 /* We don't allow changing the order of the computation in the inner-loop
1240 when doing outer-loop vectorization. */
1241 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1242 return NULL;
1244 if (!is_gimple_assign (last_stmt))
1245 return NULL;
1247 type = gimple_expr_type (last_stmt);
1249 /* Look for the following pattern
1250 DX = (TYPE) X;
1251 sum_1 = DX + sum_0;
1252 In which DX is at least double the size of X, and sum_1 has been
1253 recognized as a reduction variable.
1256 /* Starting from LAST_STMT, follow the defs of its uses in search
1257 of the above pattern. */
1259 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1260 return NULL;
1262 if (!vect_reassociating_reduction_p (stmt_vinfo))
1263 return NULL;
1265 oprnd0 = gimple_assign_rhs1 (last_stmt);
1266 oprnd1 = gimple_assign_rhs2 (last_stmt);
1267 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1268 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1269 return NULL;
1271 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1272 we know that oprnd1 is the reduction variable (defined by a loop-header
1273 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1274 Left to check that oprnd0 is defined by a cast from type 'type' to type
1275 'TYPE'. */
1277 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1278 &promotion)
1279 || !promotion)
1280 return NULL;
1282 oprnd0 = gimple_assign_rhs1 (stmt);
1283 *type_in = half_type;
1284 *type_out = type;
1286 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1287 var = vect_recog_temp_ssa_var (type, NULL);
1288 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1290 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE, vect_location,
1293 "vect_recog_widen_sum_pattern: detected: ");
1294 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1297 return pattern_stmt;
1301 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1303 Input:
1304 STMT - a statement to check.
1305 DEF - we support operations with two operands, one of which is constant.
1306 The other operand can be defined by a demotion operation, or by a
1307 previous statement in a sequence of over-promoted operations. In the
1308 later case DEF is used to replace that operand. (It is defined by a
1309 pattern statement we created for the previous statement in the
1310 sequence).
1312 Input/output:
1313 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1314 NULL, it's the type of DEF.
1315 STMTS - additional pattern statements. If a pattern statement (type
1316 conversion) is created in this function, its original statement is
1317 added to STMTS.
1319 Output:
1320 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1321 operands to use in the new pattern statement for STMT (will be created
1322 in vect_recog_over_widening_pattern ()).
1323 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1324 statements for STMT: the first one is a type promotion and the second
1325 one is the operation itself. We return the type promotion statement
1326 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1327 the second pattern statement. */
1329 static bool
1330 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1331 tree *op0, tree *op1, gimple **new_def_stmt,
1332 vec<gimple *> *stmts)
1334 enum tree_code code;
1335 tree const_oprnd, oprnd;
1336 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1337 gimple *def_stmt, *new_stmt;
1338 bool first = false;
1339 bool promotion;
1341 *op0 = NULL_TREE;
1342 *op1 = NULL_TREE;
1343 *new_def_stmt = NULL;
1345 if (!is_gimple_assign (stmt))
1346 return false;
1348 code = gimple_assign_rhs_code (stmt);
1349 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1350 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1351 return false;
1353 oprnd = gimple_assign_rhs1 (stmt);
1354 const_oprnd = gimple_assign_rhs2 (stmt);
1355 type = gimple_expr_type (stmt);
1357 if (TREE_CODE (oprnd) != SSA_NAME
1358 || TREE_CODE (const_oprnd) != INTEGER_CST)
1359 return false;
1361 /* If oprnd has other uses besides that in stmt we cannot mark it
1362 as being part of a pattern only. */
1363 if (!has_single_use (oprnd))
1364 return false;
1366 /* If we are in the middle of a sequence, we use DEF from a previous
1367 statement. Otherwise, OPRND has to be a result of type promotion. */
1368 if (*new_type)
1370 half_type = *new_type;
1371 oprnd = def;
1373 else
1375 first = true;
1376 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1377 &promotion)
1378 || !promotion
1379 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1380 return false;
1383 /* Can we perform the operation on a smaller type? */
1384 switch (code)
1386 case BIT_IOR_EXPR:
1387 case BIT_XOR_EXPR:
1388 case BIT_AND_EXPR:
1389 if (!int_fits_type_p (const_oprnd, half_type))
1391 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1392 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1393 return false;
1395 interm_type = build_nonstandard_integer_type (
1396 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1397 if (!int_fits_type_p (const_oprnd, interm_type))
1398 return false;
1401 break;
1403 case LSHIFT_EXPR:
1404 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1405 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1406 return false;
1408 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1409 (e.g., if the original value was char, the shift amount is at most 8
1410 if we want to use short). */
1411 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1412 return false;
1414 interm_type = build_nonstandard_integer_type (
1415 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1417 if (!vect_supportable_shift (code, interm_type))
1418 return false;
1420 break;
1422 case RSHIFT_EXPR:
1423 if (vect_supportable_shift (code, half_type))
1424 break;
1426 /* Try intermediate type - HALF_TYPE is not supported. */
1427 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1428 return false;
1430 interm_type = build_nonstandard_integer_type (
1431 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1433 if (!vect_supportable_shift (code, interm_type))
1434 return false;
1436 break;
1438 default:
1439 gcc_unreachable ();
1442 /* There are four possible cases:
1443 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1444 the first statement in the sequence)
1445 a. The original, HALF_TYPE, is not enough - we replace the promotion
1446 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1447 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1448 promotion.
1449 2. OPRND is defined by a pattern statement we created.
1450 a. Its type is not sufficient for the operation, we create a new stmt:
1451 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1452 this statement in NEW_DEF_STMT, and it is later put in
1453 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1454 b. OPRND is good to use in the new statement. */
1455 if (first)
1457 if (interm_type)
1459 /* Replace the original type conversion HALF_TYPE->TYPE with
1460 HALF_TYPE->INTERM_TYPE. */
1461 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1463 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1464 /* Check if the already created pattern stmt is what we need. */
1465 if (!is_gimple_assign (new_stmt)
1466 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1467 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1468 return false;
1470 stmts->safe_push (def_stmt);
1471 oprnd = gimple_assign_lhs (new_stmt);
1473 else
1475 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1476 oprnd = gimple_assign_rhs1 (def_stmt);
1477 new_oprnd = make_ssa_name (interm_type);
1478 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1479 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1480 stmts->safe_push (def_stmt);
1481 oprnd = new_oprnd;
1484 else
1486 /* Retrieve the operand before the type promotion. */
1487 oprnd = gimple_assign_rhs1 (def_stmt);
1490 else
1492 if (interm_type)
1494 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1495 new_oprnd = make_ssa_name (interm_type);
1496 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1497 oprnd = new_oprnd;
1498 *new_def_stmt = new_stmt;
1501 /* Otherwise, OPRND is already set. */
1504 if (interm_type)
1505 *new_type = interm_type;
1506 else
1507 *new_type = half_type;
1509 *op0 = oprnd;
1510 *op1 = fold_convert (*new_type, const_oprnd);
1512 return true;
1516 /* Try to find a statement or a sequence of statements that can be performed
1517 on a smaller type:
1519 type x_t;
1520 TYPE x_T, res0_T, res1_T;
1521 loop:
1522 S1 x_t = *p;
1523 S2 x_T = (TYPE) x_t;
1524 S3 res0_T = op (x_T, C0);
1525 S4 res1_T = op (res0_T, C1);
1526 S5 ... = () res1_T; - type demotion
1528 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1529 constants.
1530 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1531 be 'type' or some intermediate type. For now, we expect S5 to be a type
1532 demotion operation. We also check that S3 and S4 have only one use. */
1534 static gimple *
1535 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1536 tree *type_in, tree *type_out)
1538 gimple *stmt = stmts->pop ();
1539 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1540 *use_stmt = NULL;
1541 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1542 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1543 bool first;
1544 tree type = NULL;
1546 first = true;
1547 while (1)
1549 if (!vinfo_for_stmt (stmt)
1550 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1551 return NULL;
1553 new_def_stmt = NULL;
1554 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1555 &op0, &op1, &new_def_stmt,
1556 stmts))
1558 if (first)
1559 return NULL;
1560 else
1561 break;
1564 /* STMT can be performed on a smaller type. Check its uses. */
1565 use_stmt = vect_single_imm_use (stmt);
1566 if (!use_stmt || !is_gimple_assign (use_stmt))
1567 return NULL;
1569 /* Create pattern statement for STMT. */
1570 vectype = get_vectype_for_scalar_type (new_type);
1571 if (!vectype)
1572 return NULL;
1574 /* We want to collect all the statements for which we create pattern
1575 statetments, except for the case when the last statement in the
1576 sequence doesn't have a corresponding pattern statement. In such
1577 case we associate the last pattern statement with the last statement
1578 in the sequence. Therefore, we only add the original statement to
1579 the list if we know that it is not the last. */
1580 if (prev_stmt)
1581 stmts->safe_push (prev_stmt);
1583 var = vect_recog_temp_ssa_var (new_type, NULL);
1584 pattern_stmt
1585 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1586 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1587 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1589 if (dump_enabled_p ())
1591 dump_printf_loc (MSG_NOTE, vect_location,
1592 "created pattern stmt: ");
1593 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1596 type = gimple_expr_type (stmt);
1597 prev_stmt = stmt;
1598 stmt = use_stmt;
1600 first = false;
1603 /* We got a sequence. We expect it to end with a type demotion operation.
1604 Otherwise, we quit (for now). There are three possible cases: the
1605 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1606 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1607 NEW_TYPE differs (we create a new conversion statement). */
1608 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1610 use_lhs = gimple_assign_lhs (use_stmt);
1611 use_type = TREE_TYPE (use_lhs);
1612 /* Support only type demotion or signedess change. */
1613 if (!INTEGRAL_TYPE_P (use_type)
1614 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1615 return NULL;
1617 /* Check that NEW_TYPE is not bigger than the conversion result. */
1618 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1619 return NULL;
1621 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1622 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1624 /* Create NEW_TYPE->USE_TYPE conversion. */
1625 new_oprnd = make_ssa_name (use_type);
1626 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1627 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1629 *type_in = get_vectype_for_scalar_type (new_type);
1630 *type_out = get_vectype_for_scalar_type (use_type);
1632 /* We created a pattern statement for the last statement in the
1633 sequence, so we don't need to associate it with the pattern
1634 statement created for PREV_STMT. Therefore, we add PREV_STMT
1635 to the list in order to mark it later in vect_pattern_recog_1. */
1636 if (prev_stmt)
1637 stmts->safe_push (prev_stmt);
1639 else
1641 if (prev_stmt)
1642 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1643 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1645 *type_in = vectype;
1646 *type_out = NULL_TREE;
1649 stmts->safe_push (use_stmt);
1651 else
1652 /* TODO: support general case, create a conversion to the correct type. */
1653 return NULL;
1655 /* Pattern detected. */
1656 if (dump_enabled_p ())
1658 dump_printf_loc (MSG_NOTE, vect_location,
1659 "vect_recog_over_widening_pattern: detected: ");
1660 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1663 return pattern_stmt;
1666 /* Detect widening shift pattern:
1668 type a_t;
1669 TYPE a_T, res_T;
1671 S1 a_t = ;
1672 S2 a_T = (TYPE) a_t;
1673 S3 res_T = a_T << CONST;
1675 where type 'TYPE' is at least double the size of type 'type'.
1677 Also detect cases where the shift result is immediately converted
1678 to another type 'result_type' that is no larger in size than 'TYPE'.
1679 In those cases we perform a widen-shift that directly results in
1680 'result_type', to avoid a possible over-widening situation:
1682 type a_t;
1683 TYPE a_T, res_T;
1684 result_type res_result;
1686 S1 a_t = ;
1687 S2 a_T = (TYPE) a_t;
1688 S3 res_T = a_T << CONST;
1689 S4 res_result = (result_type) res_T;
1690 '--> res_result' = a_t w<< CONST;
1692 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1693 create an additional pattern stmt for S2 to create a variable of an
1694 intermediate type, and perform widen-shift on the intermediate type:
1696 type a_t;
1697 interm_type a_it;
1698 TYPE a_T, res_T, res_T';
1700 S1 a_t = ;
1701 S2 a_T = (TYPE) a_t;
1702 '--> a_it = (interm_type) a_t;
1703 S3 res_T = a_T << CONST;
1704 '--> res_T' = a_it <<* CONST;
1706 Input/Output:
1708 * STMTS: Contains a stmt from which the pattern search begins.
1709 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1710 in STMTS. When an intermediate type is used and a pattern statement is
1711 created for S2, we also put S2 here (before S3).
1713 Output:
1715 * TYPE_IN: The type of the input arguments to the pattern.
1717 * TYPE_OUT: The type of the output of this pattern.
1719 * Return value: A new stmt that will be used to replace the sequence of
1720 stmts that constitute the pattern. In this case it will be:
1721 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1723 static gimple *
1724 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1725 tree *type_in, tree *type_out)
1727 gimple *last_stmt = stmts->pop ();
1728 gimple *def_stmt0;
1729 tree oprnd0, oprnd1;
1730 tree type, half_type0;
1731 gimple *pattern_stmt;
1732 tree vectype, vectype_out = NULL_TREE;
1733 tree var;
1734 enum tree_code dummy_code;
1735 int dummy_int;
1736 vec<tree> dummy_vec;
1737 gimple *use_stmt;
1738 bool promotion;
1740 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1741 return NULL;
1743 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1744 return NULL;
1746 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1747 return NULL;
1749 oprnd0 = gimple_assign_rhs1 (last_stmt);
1750 oprnd1 = gimple_assign_rhs2 (last_stmt);
1751 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1752 return NULL;
1754 /* Check operand 0: it has to be defined by a type promotion. */
1755 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1756 &promotion)
1757 || !promotion)
1758 return NULL;
1760 /* Check operand 1: has to be positive. We check that it fits the type
1761 in vect_handle_widen_op_by_const (). */
1762 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1763 return NULL;
1765 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1766 type = gimple_expr_type (last_stmt);
1768 /* Check for subsequent conversion to another type. */
1769 use_stmt = vect_single_imm_use (last_stmt);
1770 if (use_stmt && is_gimple_assign (use_stmt)
1771 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1772 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1774 tree use_lhs = gimple_assign_lhs (use_stmt);
1775 tree use_type = TREE_TYPE (use_lhs);
1777 if (INTEGRAL_TYPE_P (use_type)
1778 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1780 last_stmt = use_stmt;
1781 type = use_type;
1785 /* Check if this a widening operation. */
1786 gimple *wstmt = NULL;
1787 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1788 &oprnd0, &wstmt,
1789 type, &half_type0, def_stmt0))
1790 return NULL;
1792 /* Pattern detected. */
1793 if (dump_enabled_p ())
1794 dump_printf_loc (MSG_NOTE, vect_location,
1795 "vect_recog_widen_shift_pattern: detected:\n");
1797 /* Check target support. */
1798 vectype = get_vectype_for_scalar_type (half_type0);
1799 vectype_out = get_vectype_for_scalar_type (type);
1801 if (!vectype
1802 || !vectype_out
1803 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1804 vectype_out, vectype,
1805 &dummy_code, &dummy_code,
1806 &dummy_int, &dummy_vec))
1807 return NULL;
1809 *type_in = vectype;
1810 *type_out = vectype_out;
1812 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1813 var = vect_recog_temp_ssa_var (type, NULL);
1814 pattern_stmt
1815 = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1816 if (wstmt)
1818 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1819 new_pattern_def_seq (stmt_vinfo, wstmt);
1820 stmt_vec_info new_stmt_info
1821 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1822 set_vinfo_for_stmt (wstmt, new_stmt_info);
1823 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1826 if (dump_enabled_p ())
1827 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1829 stmts->safe_push (last_stmt);
1830 return pattern_stmt;
1833 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1835 type a_t, b_t, c_t;
1837 S0 a_t = b_t r<< c_t;
1839 Input/Output:
1841 * STMTS: Contains a stmt from which the pattern search begins,
1842 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1843 with a sequence:
1845 S1 d_t = -c_t;
1846 S2 e_t = d_t & (B - 1);
1847 S3 f_t = b_t << c_t;
1848 S4 g_t = b_t >> e_t;
1849 S0 a_t = f_t | g_t;
1851 where B is element bitsize of type.
1853 Output:
1855 * TYPE_IN: The type of the input arguments to the pattern.
1857 * TYPE_OUT: The type of the output of this pattern.
1859 * Return value: A new stmt that will be used to replace the rotate
1860 S0 stmt. */
1862 static gimple *
1863 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1865 gimple *last_stmt = stmts->pop ();
1866 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1867 gimple *pattern_stmt, *def_stmt;
1868 enum tree_code rhs_code;
1869 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1870 vec_info *vinfo = stmt_vinfo->vinfo;
1871 enum vect_def_type dt;
1872 optab optab1, optab2;
1873 edge ext_def = NULL;
1875 if (!is_gimple_assign (last_stmt))
1876 return NULL;
1878 rhs_code = gimple_assign_rhs_code (last_stmt);
1879 switch (rhs_code)
1881 case LROTATE_EXPR:
1882 case RROTATE_EXPR:
1883 break;
1884 default:
1885 return NULL;
1888 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1889 return NULL;
1891 lhs = gimple_assign_lhs (last_stmt);
1892 oprnd0 = gimple_assign_rhs1 (last_stmt);
1893 type = TREE_TYPE (oprnd0);
1894 oprnd1 = gimple_assign_rhs2 (last_stmt);
1895 if (TREE_CODE (oprnd0) != SSA_NAME
1896 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1897 || !INTEGRAL_TYPE_P (type)
1898 || !TYPE_UNSIGNED (type))
1899 return NULL;
1901 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1902 return NULL;
1904 if (dt != vect_internal_def
1905 && dt != vect_constant_def
1906 && dt != vect_external_def)
1907 return NULL;
1909 vectype = get_vectype_for_scalar_type (type);
1910 if (vectype == NULL_TREE)
1911 return NULL;
1913 /* If vector/vector or vector/scalar rotate is supported by the target,
1914 don't do anything here. */
1915 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1916 if (optab1
1917 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1918 return NULL;
1920 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1922 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1923 if (optab2
1924 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1925 return NULL;
1928 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1929 don't do anything here either. */
1930 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1931 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1932 if (!optab1
1933 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1934 || !optab2
1935 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1937 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1938 return NULL;
1939 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1940 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1941 if (!optab1
1942 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1943 || !optab2
1944 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1945 return NULL;
1948 *type_in = vectype;
1949 *type_out = vectype;
1950 if (*type_in == NULL_TREE)
1951 return NULL;
1953 if (dt == vect_external_def
1954 && TREE_CODE (oprnd1) == SSA_NAME
1955 && is_a <loop_vec_info> (vinfo))
1957 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1958 ext_def = loop_preheader_edge (loop);
1959 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1961 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1962 if (bb == NULL
1963 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1964 ext_def = NULL;
1968 def = NULL_TREE;
1969 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1970 if (TREE_CODE (oprnd1) == INTEGER_CST
1971 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1972 def = oprnd1;
1973 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1975 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1976 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1977 && TYPE_PRECISION (TREE_TYPE (rhs1))
1978 == TYPE_PRECISION (type))
1979 def = rhs1;
1982 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1983 if (def == NULL_TREE)
1985 def = vect_recog_temp_ssa_var (type, NULL);
1986 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1987 if (ext_def)
1989 basic_block new_bb
1990 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1991 gcc_assert (!new_bb);
1993 else
1994 append_pattern_def_seq (stmt_vinfo, def_stmt);
1996 stype = TREE_TYPE (def);
1997 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1999 if (TREE_CODE (def) == INTEGER_CST)
2001 if (!tree_fits_uhwi_p (def)
2002 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2003 || integer_zerop (def))
2004 return NULL;
2005 def2 = build_int_cst (stype,
2006 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2008 else
2010 tree vecstype = get_vectype_for_scalar_type (stype);
2011 stmt_vec_info def_stmt_vinfo;
2013 if (vecstype == NULL_TREE)
2014 return NULL;
2015 def2 = vect_recog_temp_ssa_var (stype, NULL);
2016 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2017 if (ext_def)
2019 basic_block new_bb
2020 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2021 gcc_assert (!new_bb);
2023 else
2025 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2026 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2027 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2028 append_pattern_def_seq (stmt_vinfo, def_stmt);
2031 def2 = vect_recog_temp_ssa_var (stype, NULL);
2032 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2033 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2034 gimple_assign_lhs (def_stmt), mask);
2035 if (ext_def)
2037 basic_block new_bb
2038 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2039 gcc_assert (!new_bb);
2041 else
2043 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2044 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2045 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2046 append_pattern_def_seq (stmt_vinfo, def_stmt);
2050 var1 = vect_recog_temp_ssa_var (type, NULL);
2051 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2052 ? LSHIFT_EXPR : RSHIFT_EXPR,
2053 oprnd0, def);
2054 append_pattern_def_seq (stmt_vinfo, def_stmt);
2056 var2 = vect_recog_temp_ssa_var (type, NULL);
2057 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2058 ? RSHIFT_EXPR : LSHIFT_EXPR,
2059 oprnd0, def2);
2060 append_pattern_def_seq (stmt_vinfo, def_stmt);
2062 /* Pattern detected. */
2063 if (dump_enabled_p ())
2064 dump_printf_loc (MSG_NOTE, vect_location,
2065 "vect_recog_rotate_pattern: detected:\n");
2067 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2068 var = vect_recog_temp_ssa_var (type, NULL);
2069 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2071 if (dump_enabled_p ())
2072 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2074 stmts->safe_push (last_stmt);
2075 return pattern_stmt;
2078 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2079 vectorized:
2081 type a_t;
2082 TYPE b_T, res_T;
2084 S1 a_t = ;
2085 S2 b_T = ;
2086 S3 res_T = b_T op a_t;
2088 where type 'TYPE' is a type with different size than 'type',
2089 and op is <<, >> or rotate.
2091 Also detect cases:
2093 type a_t;
2094 TYPE b_T, c_T, res_T;
2096 S0 c_T = ;
2097 S1 a_t = (type) c_T;
2098 S2 b_T = ;
2099 S3 res_T = b_T op a_t;
2101 Input/Output:
2103 * STMTS: Contains a stmt from which the pattern search begins,
2104 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2105 with a shift/rotate which has same type on both operands, in the
2106 second case just b_T op c_T, in the first case with added cast
2107 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2109 Output:
2111 * TYPE_IN: The type of the input arguments to the pattern.
2113 * TYPE_OUT: The type of the output of this pattern.
2115 * Return value: A new stmt that will be used to replace the shift/rotate
2116 S3 stmt. */
2118 static gimple *
2119 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2120 tree *type_in, tree *type_out)
2122 gimple *last_stmt = stmts->pop ();
2123 tree oprnd0, oprnd1, lhs, var;
2124 gimple *pattern_stmt, *def_stmt;
2125 enum tree_code rhs_code;
2126 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2127 vec_info *vinfo = stmt_vinfo->vinfo;
2128 enum vect_def_type dt;
2130 if (!is_gimple_assign (last_stmt))
2131 return NULL;
2133 rhs_code = gimple_assign_rhs_code (last_stmt);
2134 switch (rhs_code)
2136 case LSHIFT_EXPR:
2137 case RSHIFT_EXPR:
2138 case LROTATE_EXPR:
2139 case RROTATE_EXPR:
2140 break;
2141 default:
2142 return NULL;
2145 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2146 return NULL;
2148 lhs = gimple_assign_lhs (last_stmt);
2149 oprnd0 = gimple_assign_rhs1 (last_stmt);
2150 oprnd1 = gimple_assign_rhs2 (last_stmt);
2151 if (TREE_CODE (oprnd0) != SSA_NAME
2152 || TREE_CODE (oprnd1) != SSA_NAME
2153 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2154 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2155 || TYPE_PRECISION (TREE_TYPE (lhs))
2156 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2157 return NULL;
2159 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2160 return NULL;
2162 if (dt != vect_internal_def)
2163 return NULL;
2165 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2166 *type_out = *type_in;
2167 if (*type_in == NULL_TREE)
2168 return NULL;
2170 tree def = NULL_TREE;
2171 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2172 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2174 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2175 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2176 && TYPE_PRECISION (TREE_TYPE (rhs1))
2177 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2179 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2180 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2181 def = rhs1;
2182 else
2184 tree mask
2185 = build_low_bits_mask (TREE_TYPE (rhs1),
2186 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2187 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2188 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2189 stmt_vec_info new_stmt_info
2190 = new_stmt_vec_info (def_stmt, vinfo);
2191 set_vinfo_for_stmt (def_stmt, new_stmt_info);
2192 STMT_VINFO_VECTYPE (new_stmt_info)
2193 = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2194 new_pattern_def_seq (stmt_vinfo, def_stmt);
2199 if (def == NULL_TREE)
2201 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2202 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2203 new_pattern_def_seq (stmt_vinfo, def_stmt);
2206 /* Pattern detected. */
2207 if (dump_enabled_p ())
2208 dump_printf_loc (MSG_NOTE, vect_location,
2209 "vect_recog_vector_vector_shift_pattern: detected:\n");
2211 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2212 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2213 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2215 if (dump_enabled_p ())
2216 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2218 stmts->safe_push (last_stmt);
2219 return pattern_stmt;
2222 /* Return true iff the target has a vector optab implementing the operation
2223 CODE on type VECTYPE. */
2225 static bool
2226 target_has_vecop_for_code (tree_code code, tree vectype)
2228 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2229 return voptab
2230 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2233 /* Verify that the target has optabs of VECTYPE to perform all the steps
2234 needed by the multiplication-by-immediate synthesis algorithm described by
2235 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2236 present. Return true iff the target supports all the steps. */
2238 static bool
2239 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2240 tree vectype, bool synth_shift_p)
2242 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2243 return false;
2245 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2246 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2248 if (var == negate_variant
2249 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2250 return false;
2252 /* If we must synthesize shifts with additions make sure that vector
2253 addition is available. */
2254 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2255 return false;
2257 for (int i = 1; i < alg->ops; i++)
2259 switch (alg->op[i])
2261 case alg_shift:
2262 break;
2263 case alg_add_t_m2:
2264 case alg_add_t2_m:
2265 case alg_add_factor:
2266 if (!supports_vplus)
2267 return false;
2268 break;
2269 case alg_sub_t_m2:
2270 case alg_sub_t2_m:
2271 case alg_sub_factor:
2272 if (!supports_vminus)
2273 return false;
2274 break;
2275 case alg_unknown:
2276 case alg_m:
2277 case alg_zero:
2278 case alg_impossible:
2279 return false;
2280 default:
2281 gcc_unreachable ();
2285 return true;
2288 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2289 putting the final result in DEST. Append all statements but the last into
2290 VINFO. Return the last statement. */
2292 static gimple *
2293 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2294 stmt_vec_info vinfo)
2296 HOST_WIDE_INT i;
2297 tree itype = TREE_TYPE (op);
2298 tree prev_res = op;
2299 gcc_assert (amnt >= 0);
2300 for (i = 0; i < amnt; i++)
2302 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2303 : dest;
2304 gimple *stmt
2305 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2306 prev_res = tmp_var;
2307 if (i < amnt - 1)
2308 append_pattern_def_seq (vinfo, stmt);
2309 else
2310 return stmt;
2312 gcc_unreachable ();
2313 return NULL;
2316 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2317 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2318 the process if necessary. Append the resulting assignment statements
2319 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2320 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2321 left shifts using additions. */
2323 static tree
2324 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2325 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2327 if (integer_zerop (op2)
2328 && (code == LSHIFT_EXPR
2329 || code == PLUS_EXPR))
2331 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2332 return op1;
2335 gimple *stmt;
2336 tree itype = TREE_TYPE (op1);
2337 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2339 if (code == LSHIFT_EXPR
2340 && synth_shift_p)
2342 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2343 stmt_vinfo);
2344 append_pattern_def_seq (stmt_vinfo, stmt);
2345 return tmp_var;
2348 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2349 append_pattern_def_seq (stmt_vinfo, stmt);
2350 return tmp_var;
2353 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2354 and simple arithmetic operations to be vectorized. Record the statements
2355 produced in STMT_VINFO and return the last statement in the sequence or
2356 NULL if it's not possible to synthesize such a multiplication.
2357 This function mirrors the behavior of expand_mult_const in expmed.c but
2358 works on tree-ssa form. */
2360 static gimple *
2361 vect_synth_mult_by_constant (tree op, tree val,
2362 stmt_vec_info stmt_vinfo)
2364 tree itype = TREE_TYPE (op);
2365 machine_mode mode = TYPE_MODE (itype);
2366 struct algorithm alg;
2367 mult_variant variant;
2368 if (!tree_fits_shwi_p (val))
2369 return NULL;
2371 /* Multiplication synthesis by shifts, adds and subs can introduce
2372 signed overflow where the original operation didn't. Perform the
2373 operations on an unsigned type and cast back to avoid this.
2374 In the future we may want to relax this for synthesis algorithms
2375 that we can prove do not cause unexpected overflow. */
2376 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2378 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2380 /* Targets that don't support vector shifts but support vector additions
2381 can synthesize shifts that way. */
2382 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2384 HOST_WIDE_INT hwval = tree_to_shwi (val);
2385 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2386 The vectorizer's benefit analysis will decide whether it's beneficial
2387 to do this. */
2388 bool possible = choose_mult_variant (mode, hwval, &alg,
2389 &variant, MAX_COST);
2390 if (!possible)
2391 return NULL;
2393 tree vectype = get_vectype_for_scalar_type (multtype);
2395 if (!vectype
2396 || !target_supports_mult_synth_alg (&alg, variant,
2397 vectype, synth_shift_p))
2398 return NULL;
2400 tree accumulator;
2402 /* Clear out the sequence of statements so we can populate it below. */
2403 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2404 gimple *stmt = NULL;
2406 if (cast_to_unsigned_p)
2408 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2409 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2410 append_pattern_def_seq (stmt_vinfo, stmt);
2411 op = tmp_op;
2414 if (alg.op[0] == alg_zero)
2415 accumulator = build_int_cst (multtype, 0);
2416 else
2417 accumulator = op;
2419 bool needs_fixup = (variant == negate_variant)
2420 || (variant == add_variant);
2422 for (int i = 1; i < alg.ops; i++)
2424 tree shft_log = build_int_cst (multtype, alg.log[i]);
2425 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2426 tree tmp_var = NULL_TREE;
2428 switch (alg.op[i])
2430 case alg_shift:
2431 if (synth_shift_p)
2432 stmt
2433 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2434 stmt_vinfo);
2435 else
2436 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2437 shft_log);
2438 break;
2439 case alg_add_t_m2:
2440 tmp_var
2441 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2442 stmt_vinfo, synth_shift_p);
2443 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2444 tmp_var);
2445 break;
2446 case alg_sub_t_m2:
2447 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2448 shft_log, stmt_vinfo,
2449 synth_shift_p);
2450 /* In some algorithms the first step involves zeroing the
2451 accumulator. If subtracting from such an accumulator
2452 just emit the negation directly. */
2453 if (integer_zerop (accumulator))
2454 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2455 else
2456 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2457 tmp_var);
2458 break;
2459 case alg_add_t2_m:
2460 tmp_var
2461 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2462 stmt_vinfo, synth_shift_p);
2463 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2464 break;
2465 case alg_sub_t2_m:
2466 tmp_var
2467 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2468 stmt_vinfo, synth_shift_p);
2469 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2470 break;
2471 case alg_add_factor:
2472 tmp_var
2473 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2474 stmt_vinfo, synth_shift_p);
2475 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2476 tmp_var);
2477 break;
2478 case alg_sub_factor:
2479 tmp_var
2480 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2481 stmt_vinfo, synth_shift_p);
2482 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2483 accumulator);
2484 break;
2485 default:
2486 gcc_unreachable ();
2488 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2489 but rather return it directly. */
2491 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2492 append_pattern_def_seq (stmt_vinfo, stmt);
2493 accumulator = accum_tmp;
2495 if (variant == negate_variant)
2497 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2498 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2499 accumulator = accum_tmp;
2500 if (cast_to_unsigned_p)
2501 append_pattern_def_seq (stmt_vinfo, stmt);
2503 else if (variant == add_variant)
2505 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2506 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2507 accumulator = accum_tmp;
2508 if (cast_to_unsigned_p)
2509 append_pattern_def_seq (stmt_vinfo, stmt);
2511 /* Move back to a signed if needed. */
2512 if (cast_to_unsigned_p)
2514 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2515 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2518 return stmt;
2521 /* Detect multiplication by constant and convert it into a sequence of
2522 shifts and additions, subtractions, negations. We reuse the
2523 choose_mult_variant algorithms from expmed.c
2525 Input/Output:
2527 STMTS: Contains a stmt from which the pattern search begins,
2528 i.e. the mult stmt.
2530 Output:
2532 * TYPE_IN: The type of the input arguments to the pattern.
2534 * TYPE_OUT: The type of the output of this pattern.
2536 * Return value: A new stmt that will be used to replace
2537 the multiplication. */
2539 static gimple *
2540 vect_recog_mult_pattern (vec<gimple *> *stmts,
2541 tree *type_in, tree *type_out)
2543 gimple *last_stmt = stmts->pop ();
2544 tree oprnd0, oprnd1, vectype, itype;
2545 gimple *pattern_stmt;
2546 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2548 if (!is_gimple_assign (last_stmt))
2549 return NULL;
2551 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2552 return NULL;
2554 oprnd0 = gimple_assign_rhs1 (last_stmt);
2555 oprnd1 = gimple_assign_rhs2 (last_stmt);
2556 itype = TREE_TYPE (oprnd0);
2558 if (TREE_CODE (oprnd0) != SSA_NAME
2559 || TREE_CODE (oprnd1) != INTEGER_CST
2560 || !INTEGRAL_TYPE_P (itype)
2561 || !type_has_mode_precision_p (itype))
2562 return NULL;
2564 vectype = get_vectype_for_scalar_type (itype);
2565 if (vectype == NULL_TREE)
2566 return NULL;
2568 /* If the target can handle vectorized multiplication natively,
2569 don't attempt to optimize this. */
2570 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2571 if (mul_optab != unknown_optab)
2573 machine_mode vec_mode = TYPE_MODE (vectype);
2574 int icode = (int) optab_handler (mul_optab, vec_mode);
2575 if (icode != CODE_FOR_nothing)
2576 return NULL;
2579 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2580 if (!pattern_stmt)
2581 return NULL;
2583 /* Pattern detected. */
2584 if (dump_enabled_p ())
2585 dump_printf_loc (MSG_NOTE, vect_location,
2586 "vect_recog_mult_pattern: detected:\n");
2588 if (dump_enabled_p ())
2589 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2590 pattern_stmt,0);
2592 stmts->safe_push (last_stmt);
2593 *type_in = vectype;
2594 *type_out = vectype;
2596 return pattern_stmt;
2599 /* Detect a signed division by a constant that wouldn't be
2600 otherwise vectorized:
2602 type a_t, b_t;
2604 S1 a_t = b_t / N;
2606 where type 'type' is an integral type and N is a constant.
2608 Similarly handle modulo by a constant:
2610 S4 a_t = b_t % N;
2612 Input/Output:
2614 * STMTS: Contains a stmt from which the pattern search begins,
2615 i.e. the division stmt. S1 is replaced by if N is a power
2616 of two constant and type is signed:
2617 S3 y_t = b_t < 0 ? N - 1 : 0;
2618 S2 x_t = b_t + y_t;
2619 S1' a_t = x_t >> log2 (N);
2621 S4 is replaced if N is a power of two constant and
2622 type is signed by (where *_T temporaries have unsigned type):
2623 S9 y_T = b_t < 0 ? -1U : 0U;
2624 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2625 S7 z_t = (type) z_T;
2626 S6 w_t = b_t + z_t;
2627 S5 x_t = w_t & (N - 1);
2628 S4' a_t = x_t - z_t;
2630 Output:
2632 * TYPE_IN: The type of the input arguments to the pattern.
2634 * TYPE_OUT: The type of the output of this pattern.
2636 * Return value: A new stmt that will be used to replace the division
2637 S1 or modulo S4 stmt. */
2639 static gimple *
2640 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2641 tree *type_in, tree *type_out)
2643 gimple *last_stmt = stmts->pop ();
2644 tree oprnd0, oprnd1, vectype, itype, cond;
2645 gimple *pattern_stmt, *def_stmt;
2646 enum tree_code rhs_code;
2647 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2648 vec_info *vinfo = stmt_vinfo->vinfo;
2649 optab optab;
2650 tree q;
2651 int dummy_int, prec;
2652 stmt_vec_info def_stmt_vinfo;
2654 if (!is_gimple_assign (last_stmt))
2655 return NULL;
2657 rhs_code = gimple_assign_rhs_code (last_stmt);
2658 switch (rhs_code)
2660 case TRUNC_DIV_EXPR:
2661 case TRUNC_MOD_EXPR:
2662 break;
2663 default:
2664 return NULL;
2667 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2668 return NULL;
2670 oprnd0 = gimple_assign_rhs1 (last_stmt);
2671 oprnd1 = gimple_assign_rhs2 (last_stmt);
2672 itype = TREE_TYPE (oprnd0);
2673 if (TREE_CODE (oprnd0) != SSA_NAME
2674 || TREE_CODE (oprnd1) != INTEGER_CST
2675 || TREE_CODE (itype) != INTEGER_TYPE
2676 || !type_has_mode_precision_p (itype))
2677 return NULL;
2679 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2680 vectype = get_vectype_for_scalar_type (itype);
2681 if (vectype == NULL_TREE)
2682 return NULL;
2684 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2686 /* If the target can handle vectorized division or modulo natively,
2687 don't attempt to optimize this, since native division is likely
2688 to give smaller code. */
2689 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2690 if (optab != unknown_optab)
2692 machine_mode vec_mode = TYPE_MODE (vectype);
2693 int icode = (int) optab_handler (optab, vec_mode);
2694 if (icode != CODE_FOR_nothing)
2695 return NULL;
2699 prec = TYPE_PRECISION (itype);
2700 if (integer_pow2p (oprnd1))
2702 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2703 return NULL;
2705 /* Pattern detected. */
2706 if (dump_enabled_p ())
2707 dump_printf_loc (MSG_NOTE, vect_location,
2708 "vect_recog_divmod_pattern: detected:\n");
2710 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2711 build_int_cst (itype, 0));
2712 if (rhs_code == TRUNC_DIV_EXPR)
2714 tree var = vect_recog_temp_ssa_var (itype, NULL);
2715 tree shift;
2716 def_stmt
2717 = gimple_build_assign (var, COND_EXPR, cond,
2718 fold_build2 (MINUS_EXPR, itype, oprnd1,
2719 build_int_cst (itype, 1)),
2720 build_int_cst (itype, 0));
2721 new_pattern_def_seq (stmt_vinfo, def_stmt);
2722 var = vect_recog_temp_ssa_var (itype, NULL);
2723 def_stmt
2724 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2725 gimple_assign_lhs (def_stmt));
2726 append_pattern_def_seq (stmt_vinfo, def_stmt);
2728 shift = build_int_cst (itype, tree_log2 (oprnd1));
2729 pattern_stmt
2730 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2731 RSHIFT_EXPR, var, shift);
2733 else
2735 tree signmask;
2736 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2737 if (compare_tree_int (oprnd1, 2) == 0)
2739 signmask = vect_recog_temp_ssa_var (itype, NULL);
2740 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2741 build_int_cst (itype, 1),
2742 build_int_cst (itype, 0));
2743 append_pattern_def_seq (stmt_vinfo, def_stmt);
2745 else
2747 tree utype
2748 = build_nonstandard_integer_type (prec, 1);
2749 tree vecutype = get_vectype_for_scalar_type (utype);
2750 tree shift
2751 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2752 - tree_log2 (oprnd1));
2753 tree var = vect_recog_temp_ssa_var (utype, NULL);
2755 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2756 build_int_cst (utype, -1),
2757 build_int_cst (utype, 0));
2758 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2759 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2760 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2761 append_pattern_def_seq (stmt_vinfo, def_stmt);
2762 var = vect_recog_temp_ssa_var (utype, NULL);
2763 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2764 gimple_assign_lhs (def_stmt),
2765 shift);
2766 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2767 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2768 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2769 append_pattern_def_seq (stmt_vinfo, def_stmt);
2770 signmask = vect_recog_temp_ssa_var (itype, NULL);
2771 def_stmt
2772 = gimple_build_assign (signmask, NOP_EXPR, var);
2773 append_pattern_def_seq (stmt_vinfo, def_stmt);
2775 def_stmt
2776 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2777 PLUS_EXPR, oprnd0, signmask);
2778 append_pattern_def_seq (stmt_vinfo, def_stmt);
2779 def_stmt
2780 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2781 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2782 fold_build2 (MINUS_EXPR, itype, oprnd1,
2783 build_int_cst (itype, 1)));
2784 append_pattern_def_seq (stmt_vinfo, def_stmt);
2786 pattern_stmt
2787 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2788 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2789 signmask);
2792 if (dump_enabled_p ())
2793 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2796 stmts->safe_push (last_stmt);
2798 *type_in = vectype;
2799 *type_out = vectype;
2800 return pattern_stmt;
2803 if (prec > HOST_BITS_PER_WIDE_INT
2804 || integer_zerop (oprnd1))
2805 return NULL;
2807 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2808 return NULL;
2810 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2812 if (TYPE_UNSIGNED (itype))
2814 unsigned HOST_WIDE_INT mh, ml;
2815 int pre_shift, post_shift;
2816 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2817 & GET_MODE_MASK (itype_mode));
2818 tree t1, t2, t3, t4;
2820 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2821 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2822 return NULL;
2824 /* Find a suitable multiplier and right shift count
2825 instead of multiplying with D. */
2826 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2828 /* If the suggested multiplier is more than SIZE bits, we can do better
2829 for even divisors, using an initial right shift. */
2830 if (mh != 0 && (d & 1) == 0)
2832 pre_shift = ctz_or_zero (d);
2833 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2834 &ml, &post_shift, &dummy_int);
2835 gcc_assert (!mh);
2837 else
2838 pre_shift = 0;
2840 if (mh != 0)
2842 if (post_shift - 1 >= prec)
2843 return NULL;
2845 /* t1 = oprnd0 h* ml;
2846 t2 = oprnd0 - t1;
2847 t3 = t2 >> 1;
2848 t4 = t1 + t3;
2849 q = t4 >> (post_shift - 1); */
2850 t1 = vect_recog_temp_ssa_var (itype, NULL);
2851 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2852 build_int_cst (itype, ml));
2853 append_pattern_def_seq (stmt_vinfo, def_stmt);
2855 t2 = vect_recog_temp_ssa_var (itype, NULL);
2856 def_stmt
2857 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2858 append_pattern_def_seq (stmt_vinfo, def_stmt);
2860 t3 = vect_recog_temp_ssa_var (itype, NULL);
2861 def_stmt
2862 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2863 append_pattern_def_seq (stmt_vinfo, def_stmt);
2865 t4 = vect_recog_temp_ssa_var (itype, NULL);
2866 def_stmt
2867 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2869 if (post_shift != 1)
2871 append_pattern_def_seq (stmt_vinfo, def_stmt);
2873 q = vect_recog_temp_ssa_var (itype, NULL);
2874 pattern_stmt
2875 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2876 build_int_cst (itype, post_shift - 1));
2878 else
2880 q = t4;
2881 pattern_stmt = def_stmt;
2884 else
2886 if (pre_shift >= prec || post_shift >= prec)
2887 return NULL;
2889 /* t1 = oprnd0 >> pre_shift;
2890 t2 = t1 h* ml;
2891 q = t2 >> post_shift; */
2892 if (pre_shift)
2894 t1 = vect_recog_temp_ssa_var (itype, NULL);
2895 def_stmt
2896 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2897 build_int_cst (NULL, pre_shift));
2898 append_pattern_def_seq (stmt_vinfo, def_stmt);
2900 else
2901 t1 = oprnd0;
2903 t2 = vect_recog_temp_ssa_var (itype, NULL);
2904 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2905 build_int_cst (itype, ml));
2907 if (post_shift)
2909 append_pattern_def_seq (stmt_vinfo, def_stmt);
2911 q = vect_recog_temp_ssa_var (itype, NULL);
2912 def_stmt
2913 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2914 build_int_cst (itype, post_shift));
2916 else
2917 q = t2;
2919 pattern_stmt = def_stmt;
2922 else
2924 unsigned HOST_WIDE_INT ml;
2925 int post_shift;
2926 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2927 unsigned HOST_WIDE_INT abs_d;
2928 bool add = false;
2929 tree t1, t2, t3, t4;
2931 /* Give up for -1. */
2932 if (d == -1)
2933 return NULL;
2935 /* Since d might be INT_MIN, we have to cast to
2936 unsigned HOST_WIDE_INT before negating to avoid
2937 undefined signed overflow. */
2938 abs_d = (d >= 0
2939 ? (unsigned HOST_WIDE_INT) d
2940 : - (unsigned HOST_WIDE_INT) d);
2942 /* n rem d = n rem -d */
2943 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2945 d = abs_d;
2946 oprnd1 = build_int_cst (itype, abs_d);
2948 else if (HOST_BITS_PER_WIDE_INT >= prec
2949 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2950 /* This case is not handled correctly below. */
2951 return NULL;
2953 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2954 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2956 add = true;
2957 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2959 if (post_shift >= prec)
2960 return NULL;
2962 /* t1 = oprnd0 h* ml; */
2963 t1 = vect_recog_temp_ssa_var (itype, NULL);
2964 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2965 build_int_cst (itype, ml));
2967 if (add)
2969 /* t2 = t1 + oprnd0; */
2970 append_pattern_def_seq (stmt_vinfo, def_stmt);
2971 t2 = vect_recog_temp_ssa_var (itype, NULL);
2972 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2974 else
2975 t2 = t1;
2977 if (post_shift)
2979 /* t3 = t2 >> post_shift; */
2980 append_pattern_def_seq (stmt_vinfo, def_stmt);
2981 t3 = vect_recog_temp_ssa_var (itype, NULL);
2982 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2983 build_int_cst (itype, post_shift));
2985 else
2986 t3 = t2;
2988 wide_int oprnd0_min, oprnd0_max;
2989 int msb = 1;
2990 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2992 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2993 msb = 0;
2994 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2995 msb = -1;
2998 if (msb == 0 && d >= 0)
3000 /* q = t3; */
3001 q = t3;
3002 pattern_stmt = def_stmt;
3004 else
3006 /* t4 = oprnd0 >> (prec - 1);
3007 or if we know from VRP that oprnd0 >= 0
3008 t4 = 0;
3009 or if we know from VRP that oprnd0 < 0
3010 t4 = -1; */
3011 append_pattern_def_seq (stmt_vinfo, def_stmt);
3012 t4 = vect_recog_temp_ssa_var (itype, NULL);
3013 if (msb != 1)
3014 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3015 build_int_cst (itype, msb));
3016 else
3017 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3018 build_int_cst (itype, prec - 1));
3019 append_pattern_def_seq (stmt_vinfo, def_stmt);
3021 /* q = t3 - t4; or q = t4 - t3; */
3022 q = vect_recog_temp_ssa_var (itype, NULL);
3023 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3024 d < 0 ? t3 : t4);
3028 if (rhs_code == TRUNC_MOD_EXPR)
3030 tree r, t1;
3032 /* We divided. Now finish by:
3033 t1 = q * oprnd1;
3034 r = oprnd0 - t1; */
3035 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3037 t1 = vect_recog_temp_ssa_var (itype, NULL);
3038 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3039 append_pattern_def_seq (stmt_vinfo, def_stmt);
3041 r = vect_recog_temp_ssa_var (itype, NULL);
3042 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3045 /* Pattern detected. */
3046 if (dump_enabled_p ())
3048 dump_printf_loc (MSG_NOTE, vect_location,
3049 "vect_recog_divmod_pattern: detected: ");
3050 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3053 stmts->safe_push (last_stmt);
3055 *type_in = vectype;
3056 *type_out = vectype;
3057 return pattern_stmt;
3060 /* Function vect_recog_mixed_size_cond_pattern
3062 Try to find the following pattern:
3064 type x_t, y_t;
3065 TYPE a_T, b_T, c_T;
3066 loop:
3067 S1 a_T = x_t CMP y_t ? b_T : c_T;
3069 where type 'TYPE' is an integral type which has different size
3070 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3071 than 'type', the constants need to fit into an integer type
3072 with the same width as 'type') or results of conversion from 'type'.
3074 Input:
3076 * LAST_STMT: A stmt from which the pattern search begins.
3078 Output:
3080 * TYPE_IN: The type of the input arguments to the pattern.
3082 * TYPE_OUT: The type of the output of this pattern.
3084 * Return value: A new stmt that will be used to replace the pattern.
3085 Additionally a def_stmt is added.
3087 a_it = x_t CMP y_t ? b_it : c_it;
3088 a_T = (TYPE) a_it; */
3090 static gimple *
3091 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
3092 tree *type_out)
3094 gimple *last_stmt = (*stmts)[0];
3095 tree cond_expr, then_clause, else_clause;
3096 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3097 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3098 gimple *pattern_stmt, *def_stmt;
3099 vec_info *vinfo = stmt_vinfo->vinfo;
3100 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3101 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3102 bool promotion;
3103 tree comp_scalar_type;
3105 if (!is_gimple_assign (last_stmt)
3106 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3107 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3108 return NULL;
3110 cond_expr = gimple_assign_rhs1 (last_stmt);
3111 then_clause = gimple_assign_rhs2 (last_stmt);
3112 else_clause = gimple_assign_rhs3 (last_stmt);
3114 if (!COMPARISON_CLASS_P (cond_expr))
3115 return NULL;
3117 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3118 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3119 if (comp_vectype == NULL_TREE)
3120 return NULL;
3122 type = gimple_expr_type (last_stmt);
3123 if (types_compatible_p (type, comp_scalar_type)
3124 || ((TREE_CODE (then_clause) != INTEGER_CST
3125 || TREE_CODE (else_clause) != INTEGER_CST)
3126 && !INTEGRAL_TYPE_P (comp_scalar_type))
3127 || !INTEGRAL_TYPE_P (type))
3128 return NULL;
3130 if ((TREE_CODE (then_clause) != INTEGER_CST
3131 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3132 &def_stmt0, &promotion))
3133 || (TREE_CODE (else_clause) != INTEGER_CST
3134 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3135 &def_stmt1, &promotion)))
3136 return NULL;
3138 if (orig_type0 && orig_type1
3139 && !types_compatible_p (orig_type0, orig_type1))
3140 return NULL;
3142 if (orig_type0)
3144 if (!types_compatible_p (orig_type0, comp_scalar_type))
3145 return NULL;
3146 then_clause = gimple_assign_rhs1 (def_stmt0);
3147 itype = orig_type0;
3150 if (orig_type1)
3152 if (!types_compatible_p (orig_type1, comp_scalar_type))
3153 return NULL;
3154 else_clause = gimple_assign_rhs1 (def_stmt1);
3155 itype = orig_type1;
3159 HOST_WIDE_INT cmp_mode_size
3160 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3162 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3163 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3164 return NULL;
3166 vectype = get_vectype_for_scalar_type (type);
3167 if (vectype == NULL_TREE)
3168 return NULL;
3170 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3171 return NULL;
3173 if (itype == NULL_TREE)
3174 itype = build_nonstandard_integer_type (cmp_mode_size,
3175 TYPE_UNSIGNED (type));
3177 if (itype == NULL_TREE
3178 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3179 return NULL;
3181 vecitype = get_vectype_for_scalar_type (itype);
3182 if (vecitype == NULL_TREE)
3183 return NULL;
3185 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3186 return NULL;
3188 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3190 if ((TREE_CODE (then_clause) == INTEGER_CST
3191 && !int_fits_type_p (then_clause, itype))
3192 || (TREE_CODE (else_clause) == INTEGER_CST
3193 && !int_fits_type_p (else_clause, itype)))
3194 return NULL;
3197 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3198 COND_EXPR, unshare_expr (cond_expr),
3199 fold_convert (itype, then_clause),
3200 fold_convert (itype, else_clause));
3201 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3202 NOP_EXPR, gimple_assign_lhs (def_stmt));
3204 new_pattern_def_seq (stmt_vinfo, def_stmt);
3205 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3206 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3207 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3208 *type_in = vecitype;
3209 *type_out = vectype;
3211 if (dump_enabled_p ())
3212 dump_printf_loc (MSG_NOTE, vect_location,
3213 "vect_recog_mixed_size_cond_pattern: detected:\n");
3215 return pattern_stmt;
3219 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3220 true if bool VAR can and should be optimized that way. Assume it shouldn't
3221 in case it's a result of a comparison which can be directly vectorized into
3222 a vector comparison. Fills in STMTS with all stmts visited during the
3223 walk. */
3225 static bool
3226 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3228 gimple *def_stmt;
3229 enum vect_def_type dt;
3230 tree rhs1;
3231 enum tree_code rhs_code;
3233 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3234 return false;
3236 if (dt != vect_internal_def)
3237 return false;
3239 if (!is_gimple_assign (def_stmt))
3240 return false;
3242 if (stmts.contains (def_stmt))
3243 return true;
3245 rhs1 = gimple_assign_rhs1 (def_stmt);
3246 rhs_code = gimple_assign_rhs_code (def_stmt);
3247 switch (rhs_code)
3249 case SSA_NAME:
3250 if (! check_bool_pattern (rhs1, vinfo, stmts))
3251 return false;
3252 break;
3254 CASE_CONVERT:
3255 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3256 return false;
3257 if (! check_bool_pattern (rhs1, vinfo, stmts))
3258 return false;
3259 break;
3261 case BIT_NOT_EXPR:
3262 if (! check_bool_pattern (rhs1, vinfo, stmts))
3263 return false;
3264 break;
3266 case BIT_AND_EXPR:
3267 case BIT_IOR_EXPR:
3268 case BIT_XOR_EXPR:
3269 if (! check_bool_pattern (rhs1, vinfo, stmts)
3270 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3271 return false;
3272 break;
3274 default:
3275 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3277 tree vecitype, comp_vectype;
3279 /* If the comparison can throw, then is_gimple_condexpr will be
3280 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3281 if (stmt_could_throw_p (def_stmt))
3282 return false;
3284 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3285 if (comp_vectype == NULL_TREE)
3286 return false;
3288 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3289 if (mask_type
3290 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3291 return false;
3293 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3295 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3296 tree itype
3297 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3298 vecitype = get_vectype_for_scalar_type (itype);
3299 if (vecitype == NULL_TREE)
3300 return false;
3302 else
3303 vecitype = comp_vectype;
3304 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3305 return false;
3307 else
3308 return false;
3309 break;
3312 bool res = stmts.add (def_stmt);
3313 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3314 gcc_assert (!res);
3316 return true;
3320 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3321 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3322 pattern sequence. */
3324 static tree
3325 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3327 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3328 NOP_EXPR, var);
3329 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3330 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3331 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3332 append_pattern_def_seq (stmt_info, cast_stmt);
3333 return gimple_assign_lhs (cast_stmt);
3336 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3337 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3338 type, OUT_TYPE is the desired final integer type of the whole pattern.
3339 STMT_INFO is the info of the pattern root and is where pattern stmts should
3340 be associated with. DEFS is a map of pattern defs. */
3342 static void
3343 adjust_bool_pattern (tree var, tree out_type,
3344 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3346 gimple *stmt = SSA_NAME_DEF_STMT (var);
3347 enum tree_code rhs_code, def_rhs_code;
3348 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3349 location_t loc;
3350 gimple *pattern_stmt, *def_stmt;
3351 tree trueval = NULL_TREE;
3353 rhs1 = gimple_assign_rhs1 (stmt);
3354 rhs2 = gimple_assign_rhs2 (stmt);
3355 rhs_code = gimple_assign_rhs_code (stmt);
3356 loc = gimple_location (stmt);
3357 switch (rhs_code)
3359 case SSA_NAME:
3360 CASE_CONVERT:
3361 irhs1 = *defs.get (rhs1);
3362 itype = TREE_TYPE (irhs1);
3363 pattern_stmt
3364 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3365 SSA_NAME, irhs1);
3366 break;
3368 case BIT_NOT_EXPR:
3369 irhs1 = *defs.get (rhs1);
3370 itype = TREE_TYPE (irhs1);
3371 pattern_stmt
3372 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3373 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3374 break;
3376 case BIT_AND_EXPR:
3377 /* Try to optimize x = y & (a < b ? 1 : 0); into
3378 x = (a < b ? y : 0);
3380 E.g. for:
3381 bool a_b, b_b, c_b;
3382 TYPE d_T;
3384 S1 a_b = x1 CMP1 y1;
3385 S2 b_b = x2 CMP2 y2;
3386 S3 c_b = a_b & b_b;
3387 S4 d_T = (TYPE) c_b;
3389 we would normally emit:
3391 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3392 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3393 S3' c_T = a_T & b_T;
3394 S4' d_T = c_T;
3396 but we can save one stmt by using the
3397 result of one of the COND_EXPRs in the other COND_EXPR and leave
3398 BIT_AND_EXPR stmt out:
3400 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3401 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3402 S4' f_T = c_T;
3404 At least when VEC_COND_EXPR is implemented using masks
3405 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3406 computes the comparison masks and ands it, in one case with
3407 all ones vector, in the other case with a vector register.
3408 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3409 often more expensive. */
3410 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3411 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3412 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3414 irhs1 = *defs.get (rhs1);
3415 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3416 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3417 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3419 rhs_code = def_rhs_code;
3420 rhs1 = def_rhs1;
3421 rhs2 = gimple_assign_rhs2 (def_stmt);
3422 trueval = irhs1;
3423 goto do_compare;
3425 else
3426 irhs2 = *defs.get (rhs2);
3427 goto and_ior_xor;
3429 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3430 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3431 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3433 irhs2 = *defs.get (rhs2);
3434 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3435 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3436 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3438 rhs_code = def_rhs_code;
3439 rhs1 = def_rhs1;
3440 rhs2 = gimple_assign_rhs2 (def_stmt);
3441 trueval = irhs2;
3442 goto do_compare;
3444 else
3445 irhs1 = *defs.get (rhs1);
3446 goto and_ior_xor;
3448 /* FALLTHRU */
3449 case BIT_IOR_EXPR:
3450 case BIT_XOR_EXPR:
3451 irhs1 = *defs.get (rhs1);
3452 irhs2 = *defs.get (rhs2);
3453 and_ior_xor:
3454 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3455 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3457 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3458 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3459 int out_prec = TYPE_PRECISION (out_type);
3460 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3461 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3462 stmt_info);
3463 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3464 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3465 stmt_info);
3466 else
3468 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3469 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3472 itype = TREE_TYPE (irhs1);
3473 pattern_stmt
3474 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3475 rhs_code, irhs1, irhs2);
3476 break;
3478 default:
3479 do_compare:
3480 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3481 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3482 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3483 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3484 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3486 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3487 itype
3488 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3490 else
3491 itype = TREE_TYPE (rhs1);
3492 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3493 if (trueval == NULL_TREE)
3494 trueval = build_int_cst (itype, 1);
3495 else
3496 gcc_checking_assert (useless_type_conversion_p (itype,
3497 TREE_TYPE (trueval)));
3498 pattern_stmt
3499 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3500 COND_EXPR, cond_expr, trueval,
3501 build_int_cst (itype, 0));
3502 break;
3505 gimple_set_location (pattern_stmt, loc);
3506 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3507 pattern def seq stmts instead of just letting auto-detection do
3508 its work? */
3509 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3510 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3511 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3512 append_pattern_def_seq (stmt_info, pattern_stmt);
3513 defs.put (var, gimple_assign_lhs (pattern_stmt));
3516 /* Comparison function to qsort a vector of gimple stmts after UID. */
3518 static int
3519 sort_after_uid (const void *p1, const void *p2)
3521 const gimple *stmt1 = *(const gimple * const *)p1;
3522 const gimple *stmt2 = *(const gimple * const *)p2;
3523 return gimple_uid (stmt1) - gimple_uid (stmt2);
3526 /* Create pattern stmts for all stmts participating in the bool pattern
3527 specified by BOOL_STMT_SET and its root STMT with the desired type
3528 OUT_TYPE. Return the def of the pattern root. */
3530 static tree
3531 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3532 tree out_type, gimple *stmt)
3534 /* Gather original stmts in the bool pattern in their order of appearance
3535 in the IL. */
3536 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3537 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3538 i != bool_stmt_set.end (); ++i)
3539 bool_stmts.quick_push (*i);
3540 bool_stmts.qsort (sort_after_uid);
3542 /* Now process them in that order, producing pattern stmts. */
3543 hash_map <tree, tree> defs;
3544 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3545 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3546 out_type, vinfo_for_stmt (stmt), defs);
3548 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3549 gimple *pattern_stmt
3550 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3551 return gimple_assign_lhs (pattern_stmt);
3554 /* Helper for search_type_for_mask. */
3556 static tree
3557 search_type_for_mask_1 (tree var, vec_info *vinfo,
3558 hash_map<gimple *, tree> &cache)
3560 gimple *def_stmt;
3561 enum vect_def_type dt;
3562 tree rhs1;
3563 enum tree_code rhs_code;
3564 tree res = NULL_TREE, res2;
3566 if (TREE_CODE (var) != SSA_NAME)
3567 return NULL_TREE;
3569 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3570 return NULL_TREE;
3572 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3573 return NULL_TREE;
3575 if (dt != vect_internal_def)
3576 return NULL_TREE;
3578 if (!is_gimple_assign (def_stmt))
3579 return NULL_TREE;
3581 tree *c = cache.get (def_stmt);
3582 if (c)
3583 return *c;
3585 rhs_code = gimple_assign_rhs_code (def_stmt);
3586 rhs1 = gimple_assign_rhs1 (def_stmt);
3588 switch (rhs_code)
3590 case SSA_NAME:
3591 case BIT_NOT_EXPR:
3592 CASE_CONVERT:
3593 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3594 break;
3596 case BIT_AND_EXPR:
3597 case BIT_IOR_EXPR:
3598 case BIT_XOR_EXPR:
3599 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3600 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3601 cache);
3602 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3603 res = res2;
3604 break;
3606 default:
3607 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3609 tree comp_vectype, mask_type;
3611 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3613 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3614 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3615 vinfo, cache);
3616 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3617 res = res2;
3618 break;
3621 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3622 if (comp_vectype == NULL_TREE)
3624 res = NULL_TREE;
3625 break;
3628 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3629 if (!mask_type
3630 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3632 res = NULL_TREE;
3633 break;
3636 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3637 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3639 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3640 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3642 else
3643 res = TREE_TYPE (rhs1);
3647 cache.put (def_stmt, res);
3648 return res;
3651 /* Return the proper type for converting bool VAR into
3652 an integer value or NULL_TREE if no such type exists.
3653 The type is chosen so that converted value has the
3654 same number of elements as VAR's vector type. */
3656 static tree
3657 search_type_for_mask (tree var, vec_info *vinfo)
3659 hash_map<gimple *, tree> cache;
3660 return search_type_for_mask_1 (var, vinfo, cache);
3663 /* Function vect_recog_bool_pattern
3665 Try to find pattern like following:
3667 bool a_b, b_b, c_b, d_b, e_b;
3668 TYPE f_T;
3669 loop:
3670 S1 a_b = x1 CMP1 y1;
3671 S2 b_b = x2 CMP2 y2;
3672 S3 c_b = a_b & b_b;
3673 S4 d_b = x3 CMP3 y3;
3674 S5 e_b = c_b | d_b;
3675 S6 f_T = (TYPE) e_b;
3677 where type 'TYPE' is an integral type. Or a similar pattern
3678 ending in
3680 S6 f_Y = e_b ? r_Y : s_Y;
3682 as results from if-conversion of a complex condition.
3684 Input:
3686 * LAST_STMT: A stmt at the end from which the pattern
3687 search begins, i.e. cast of a bool to
3688 an integer type.
3690 Output:
3692 * TYPE_IN: The type of the input arguments to the pattern.
3694 * TYPE_OUT: The type of the output of this pattern.
3696 * Return value: A new stmt that will be used to replace the pattern.
3698 Assuming size of TYPE is the same as size of all comparisons
3699 (otherwise some casts would be added where needed), the above
3700 sequence we create related pattern stmts:
3701 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3702 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3703 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3704 S5' e_T = c_T | d_T;
3705 S6' f_T = e_T;
3707 Instead of the above S3' we could emit:
3708 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3709 S3' c_T = a_T | b_T;
3710 but the above is more efficient. */
3712 static gimple *
3713 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3714 tree *type_out)
3716 gimple *last_stmt = stmts->pop ();
3717 enum tree_code rhs_code;
3718 tree var, lhs, rhs, vectype;
3719 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3720 stmt_vec_info new_stmt_info;
3721 vec_info *vinfo = stmt_vinfo->vinfo;
3722 gimple *pattern_stmt;
3724 if (!is_gimple_assign (last_stmt))
3725 return NULL;
3727 var = gimple_assign_rhs1 (last_stmt);
3728 lhs = gimple_assign_lhs (last_stmt);
3730 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3731 return NULL;
3733 hash_set<gimple *> bool_stmts;
3735 rhs_code = gimple_assign_rhs_code (last_stmt);
3736 if (CONVERT_EXPR_CODE_P (rhs_code))
3738 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3739 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3740 return NULL;
3741 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3742 if (vectype == NULL_TREE)
3743 return NULL;
3745 if (check_bool_pattern (var, vinfo, bool_stmts))
3747 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3748 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3749 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3750 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3751 else
3752 pattern_stmt
3753 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3755 else
3757 tree type = search_type_for_mask (var, vinfo);
3758 tree cst0, cst1, tmp;
3760 if (!type)
3761 return NULL;
3763 /* We may directly use cond with narrowed type to avoid
3764 multiple cond exprs with following result packing and
3765 perform single cond with packed mask instead. In case
3766 of widening we better make cond first and then extract
3767 results. */
3768 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3769 type = TREE_TYPE (lhs);
3771 cst0 = build_int_cst (type, 0);
3772 cst1 = build_int_cst (type, 1);
3773 tmp = vect_recog_temp_ssa_var (type, NULL);
3774 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3776 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3778 tree new_vectype = get_vectype_for_scalar_type (type);
3779 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3780 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3781 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3782 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3784 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3785 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3789 *type_out = vectype;
3790 *type_in = vectype;
3791 stmts->safe_push (last_stmt);
3792 if (dump_enabled_p ())
3793 dump_printf_loc (MSG_NOTE, vect_location,
3794 "vect_recog_bool_pattern: detected:\n");
3796 return pattern_stmt;
3798 else if (rhs_code == COND_EXPR
3799 && TREE_CODE (var) == SSA_NAME)
3801 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3802 if (vectype == NULL_TREE)
3803 return NULL;
3805 /* Build a scalar type for the boolean result that when
3806 vectorized matches the vector type of the result in
3807 size and number of elements. */
3808 unsigned prec
3809 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3810 TYPE_VECTOR_SUBPARTS (vectype));
3812 tree type
3813 = build_nonstandard_integer_type (prec,
3814 TYPE_UNSIGNED (TREE_TYPE (var)));
3815 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3816 return NULL;
3818 if (!check_bool_pattern (var, vinfo, bool_stmts))
3819 return NULL;
3821 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3823 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3824 pattern_stmt
3825 = gimple_build_assign (lhs, COND_EXPR,
3826 build2 (NE_EXPR, boolean_type_node,
3827 rhs, build_int_cst (type, 0)),
3828 gimple_assign_rhs2 (last_stmt),
3829 gimple_assign_rhs3 (last_stmt));
3830 *type_out = vectype;
3831 *type_in = vectype;
3832 stmts->safe_push (last_stmt);
3833 if (dump_enabled_p ())
3834 dump_printf_loc (MSG_NOTE, vect_location,
3835 "vect_recog_bool_pattern: detected:\n");
3837 return pattern_stmt;
3839 else if (rhs_code == SSA_NAME
3840 && STMT_VINFO_DATA_REF (stmt_vinfo))
3842 stmt_vec_info pattern_stmt_info;
3843 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3844 gcc_assert (vectype != NULL_TREE);
3845 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3846 return NULL;
3848 if (check_bool_pattern (var, vinfo, bool_stmts))
3849 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3850 else
3852 tree type = search_type_for_mask (var, vinfo);
3853 tree cst0, cst1, new_vectype;
3855 if (!type)
3856 return NULL;
3858 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3859 type = TREE_TYPE (vectype);
3861 cst0 = build_int_cst (type, 0);
3862 cst1 = build_int_cst (type, 1);
3863 new_vectype = get_vectype_for_scalar_type (type);
3865 rhs = vect_recog_temp_ssa_var (type, NULL);
3866 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3868 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3869 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3870 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3871 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3874 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3875 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3877 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3878 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3879 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3880 rhs = rhs2;
3882 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3883 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3884 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3885 STMT_VINFO_DATA_REF (pattern_stmt_info)
3886 = STMT_VINFO_DATA_REF (stmt_vinfo);
3887 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3888 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3889 *type_out = vectype;
3890 *type_in = vectype;
3891 stmts->safe_push (last_stmt);
3892 if (dump_enabled_p ())
3893 dump_printf_loc (MSG_NOTE, vect_location,
3894 "vect_recog_bool_pattern: detected:\n");
3895 return pattern_stmt;
3897 else
3898 return NULL;
3902 /* A helper for vect_recog_mask_conversion_pattern. Build
3903 conversion of MASK to a type suitable for masking VECTYPE.
3904 Built statement gets required vectype and is appended to
3905 a pattern sequence of STMT_VINFO.
3907 Return converted mask. */
3909 static tree
3910 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3911 vec_info *vinfo)
3913 gimple *stmt;
3914 tree masktype, tmp;
3915 stmt_vec_info new_stmt_info;
3917 masktype = build_same_sized_truth_vector_type (vectype);
3918 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3919 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3920 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3921 set_vinfo_for_stmt (stmt, new_stmt_info);
3922 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3923 append_pattern_def_seq (stmt_vinfo, stmt);
3925 return tmp;
3929 /* Function vect_recog_mask_conversion_pattern
3931 Try to find statements which require boolean type
3932 converison. Additional conversion statements are
3933 added to handle such cases. For example:
3935 bool m_1, m_2, m_3;
3936 int i_4, i_5;
3937 double d_6, d_7;
3938 char c_1, c_2, c_3;
3940 S1 m_1 = i_4 > i_5;
3941 S2 m_2 = d_6 < d_7;
3942 S3 m_3 = m_1 & m_2;
3943 S4 c_1 = m_3 ? c_2 : c_3;
3945 Will be transformed into:
3947 S1 m_1 = i_4 > i_5;
3948 S2 m_2 = d_6 < d_7;
3949 S3'' m_2' = (_Bool[bitsize=32])m_2
3950 S3' m_3' = m_1 & m_2';
3951 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3952 S4' c_1' = m_3'' ? c_2 : c_3; */
3954 static gimple *
3955 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3956 tree *type_out)
3958 gimple *last_stmt = stmts->pop ();
3959 enum tree_code rhs_code;
3960 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3961 tree vectype1, vectype2;
3962 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3963 stmt_vec_info pattern_stmt_info;
3964 vec_info *vinfo = stmt_vinfo->vinfo;
3966 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3967 if (is_gimple_call (last_stmt)
3968 && gimple_call_internal_p (last_stmt)
3969 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3970 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3972 gcall *pattern_stmt;
3973 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3975 if (load)
3977 lhs = gimple_call_lhs (last_stmt);
3978 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3980 else
3982 rhs2 = gimple_call_arg (last_stmt, 3);
3983 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3986 rhs1 = gimple_call_arg (last_stmt, 2);
3987 rhs1_type = search_type_for_mask (rhs1, vinfo);
3988 if (!rhs1_type)
3989 return NULL;
3990 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3992 if (!vectype1 || !vectype2
3993 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3994 TYPE_VECTOR_SUBPARTS (vectype2)))
3995 return NULL;
3997 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3999 if (load)
4001 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4002 pattern_stmt
4003 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
4004 gimple_call_arg (last_stmt, 0),
4005 gimple_call_arg (last_stmt, 1),
4006 tmp);
4007 gimple_call_set_lhs (pattern_stmt, lhs);
4009 else
4010 pattern_stmt
4011 = gimple_build_call_internal (IFN_MASK_STORE, 4,
4012 gimple_call_arg (last_stmt, 0),
4013 gimple_call_arg (last_stmt, 1),
4014 tmp,
4015 gimple_call_arg (last_stmt, 3));
4017 gimple_call_set_nothrow (pattern_stmt, true);
4019 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4020 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4021 STMT_VINFO_DATA_REF (pattern_stmt_info)
4022 = STMT_VINFO_DATA_REF (stmt_vinfo);
4023 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4024 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
4026 *type_out = vectype1;
4027 *type_in = vectype1;
4028 stmts->safe_push (last_stmt);
4029 if (dump_enabled_p ())
4030 dump_printf_loc (MSG_NOTE, vect_location,
4031 "vect_recog_mask_conversion_pattern: detected:\n");
4033 return pattern_stmt;
4036 if (!is_gimple_assign (last_stmt))
4037 return NULL;
4039 gimple *pattern_stmt;
4040 lhs = gimple_assign_lhs (last_stmt);
4041 rhs1 = gimple_assign_rhs1 (last_stmt);
4042 rhs_code = gimple_assign_rhs_code (last_stmt);
4044 /* Check for cond expression requiring mask conversion. */
4045 if (rhs_code == COND_EXPR)
4047 /* vect_recog_mixed_size_cond_pattern could apply.
4048 Do nothing then. */
4049 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4050 return NULL;
4052 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4054 if (TREE_CODE (rhs1) == SSA_NAME)
4056 rhs1_type = search_type_for_mask (rhs1, vinfo);
4057 if (!rhs1_type)
4058 return NULL;
4060 else if (COMPARISON_CLASS_P (rhs1))
4062 /* Check whether we're comparing scalar booleans and (if so)
4063 whether a better mask type exists than the mask associated
4064 with boolean-sized elements. This avoids unnecessary packs
4065 and unpacks if the booleans are set from comparisons of
4066 wider types. E.g. in:
4068 int x1, x2, x3, x4, y1, y1;
4070 bool b1 = (x1 == x2);
4071 bool b2 = (x3 == x4);
4072 ... = b1 == b2 ? y1 : y2;
4074 it is better for b1 and b2 to use the mask type associated
4075 with int elements rather bool (byte) elements. */
4076 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4077 if (!rhs1_type)
4078 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4080 else
4081 return NULL;
4083 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4085 if (!vectype1 || !vectype2)
4086 return NULL;
4088 /* Continue if a conversion is needed. Also continue if we have
4089 a comparison whose vector type would normally be different from
4090 VECTYPE2 when considered in isolation. In that case we'll
4091 replace the comparison with an SSA name (so that we can record
4092 its vector type) and behave as though the comparison was an SSA
4093 name from the outset. */
4094 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4095 TYPE_VECTOR_SUBPARTS (vectype2))
4096 && (TREE_CODE (rhs1) == SSA_NAME
4097 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4098 return NULL;
4100 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4101 in place, we can handle it in vectorizable_condition. This avoids
4102 unnecessary promotion stmts and increased vectorization factor. */
4103 if (COMPARISON_CLASS_P (rhs1)
4104 && INTEGRAL_TYPE_P (rhs1_type)
4105 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4106 TYPE_VECTOR_SUBPARTS (vectype2)))
4108 gimple *dummy;
4109 enum vect_def_type dt;
4110 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
4111 &dummy, &dt)
4112 && dt == vect_external_def
4113 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
4114 &dummy, &dt)
4115 && (dt == vect_external_def
4116 || dt == vect_constant_def))
4118 tree wide_scalar_type = build_nonstandard_integer_type
4119 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4120 TYPE_UNSIGNED (rhs1_type));
4121 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4122 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4123 return NULL;
4127 /* If rhs1 is a comparison we need to move it into a
4128 separate statement. */
4129 if (TREE_CODE (rhs1) != SSA_NAME)
4131 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4132 pattern_stmt = gimple_build_assign (tmp, rhs1);
4133 rhs1 = tmp;
4135 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4136 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4137 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4138 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4141 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4142 TYPE_VECTOR_SUBPARTS (vectype2)))
4143 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4144 else
4145 tmp = rhs1;
4147 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4148 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4149 gimple_assign_rhs2 (last_stmt),
4150 gimple_assign_rhs3 (last_stmt));
4152 *type_out = vectype1;
4153 *type_in = vectype1;
4154 stmts->safe_push (last_stmt);
4155 if (dump_enabled_p ())
4156 dump_printf_loc (MSG_NOTE, vect_location,
4157 "vect_recog_mask_conversion_pattern: detected:\n");
4159 return pattern_stmt;
4162 /* Now check for binary boolean operations requiring conversion for
4163 one of operands. */
4164 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4165 return NULL;
4167 if (rhs_code != BIT_IOR_EXPR
4168 && rhs_code != BIT_XOR_EXPR
4169 && rhs_code != BIT_AND_EXPR
4170 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4171 return NULL;
4173 rhs2 = gimple_assign_rhs2 (last_stmt);
4175 rhs1_type = search_type_for_mask (rhs1, vinfo);
4176 rhs2_type = search_type_for_mask (rhs2, vinfo);
4178 if (!rhs1_type || !rhs2_type
4179 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4180 return NULL;
4182 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4184 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4185 if (!vectype1)
4186 return NULL;
4187 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4189 else
4191 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4192 if (!vectype1)
4193 return NULL;
4194 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4197 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4198 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4200 *type_out = vectype1;
4201 *type_in = vectype1;
4202 stmts->safe_push (last_stmt);
4203 if (dump_enabled_p ())
4204 dump_printf_loc (MSG_NOTE, vect_location,
4205 "vect_recog_mask_conversion_pattern: detected:\n");
4207 return pattern_stmt;
4210 /* STMT is a load or store. If the load or store is conditional, return
4211 the boolean condition under which it occurs, otherwise return null. */
4213 static tree
4214 vect_get_load_store_mask (gimple *stmt)
4216 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4218 gcc_assert (gimple_assign_single_p (def_assign));
4219 return NULL_TREE;
4222 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4224 internal_fn ifn = gimple_call_internal_fn (def_call);
4225 int mask_index = internal_fn_mask_index (ifn);
4226 return gimple_call_arg (def_call, mask_index);
4229 gcc_unreachable ();
4232 /* Return the scalar offset type that an internal gather/scatter function
4233 should use. GS_INFO describes the gather/scatter operation. */
4235 static tree
4236 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4238 tree offset_type = TREE_TYPE (gs_info->offset);
4239 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4241 /* Enforced by vect_check_gather_scatter. */
4242 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4243 gcc_assert (element_bits >= offset_bits);
4245 /* If the offset is narrower than the elements, extend it according
4246 to its sign. */
4247 if (element_bits > offset_bits)
4248 return build_nonstandard_integer_type (element_bits,
4249 TYPE_UNSIGNED (offset_type));
4251 return offset_type;
4254 /* Return MASK if MASK is suitable for masking an operation on vectors
4255 of type VECTYPE, otherwise convert it into such a form and return
4256 the result. Associate any conversion statements with STMT_INFO's
4257 pattern. */
4259 static tree
4260 vect_convert_mask_for_vectype (tree mask, tree vectype,
4261 stmt_vec_info stmt_info, vec_info *vinfo)
4263 tree mask_type = search_type_for_mask (mask, vinfo);
4264 if (mask_type)
4266 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4267 if (mask_vectype
4268 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4269 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4270 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4272 return mask;
4275 /* Return the equivalent of:
4277 fold_convert (TYPE, VALUE)
4279 with the expectation that the operation will be vectorized.
4280 If new statements are needed, add them as pattern statements
4281 to STMT_INFO. */
4283 static tree
4284 vect_add_conversion_to_patterm (tree type, tree value,
4285 stmt_vec_info stmt_info,
4286 vec_info *vinfo)
4288 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4289 return value;
4291 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4292 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4293 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4294 set_vinfo_for_stmt (conversion, new_stmt_info);
4295 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4296 append_pattern_def_seq (stmt_info, conversion);
4297 return new_value;
4300 /* Try to convert STMT into a call to a gather load or scatter store
4301 internal function. Return the final statement on success and set
4302 *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4304 This function only handles gathers and scatters that were recognized
4305 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4307 static gimple *
4308 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4309 tree *type_in, tree *type_out)
4311 /* Currently we only support this for loop vectorization. */
4312 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4313 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4314 if (!loop_vinfo)
4315 return NULL;
4317 /* Make sure that we're looking at a gather load or scatter store. */
4318 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4319 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4320 return NULL;
4322 /* Get the boolean that controls whether the load or store happens.
4323 This is null if the operation is unconditional. */
4324 tree mask = vect_get_load_store_mask (stmt);
4326 /* Make sure that the target supports an appropriate internal
4327 function for the gather/scatter operation. */
4328 gather_scatter_info gs_info;
4329 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4330 || gs_info.decl)
4331 return NULL;
4333 /* Convert the mask to the right form. */
4334 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4335 if (mask)
4336 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4337 loop_vinfo);
4339 /* Get the invariant base and non-invariant offset, converting the
4340 latter to the same width as the vector elements. */
4341 tree base = gs_info.base;
4342 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4343 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4344 last_stmt_info, loop_vinfo);
4346 /* Build the new pattern statement. */
4347 tree scale = size_int (gs_info.scale);
4348 gcall *pattern_stmt;
4349 if (DR_IS_READ (dr))
4351 if (mask != NULL)
4352 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4353 offset, scale, mask);
4354 else
4355 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4356 offset, scale);
4357 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4358 gimple_call_set_lhs (pattern_stmt, load_lhs);
4360 else
4362 tree rhs = vect_get_store_rhs (stmt);
4363 if (mask != NULL)
4364 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4365 base, offset, scale, rhs,
4366 mask);
4367 else
4368 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4369 base, offset, scale, rhs);
4371 gimple_call_set_nothrow (pattern_stmt, true);
4373 /* Copy across relevant vectorization info and associate DR with the
4374 new pattern statement instead of the original statement. */
4375 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4376 loop_vinfo);
4377 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4378 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4379 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4380 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4381 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4382 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4384 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4385 *type_out = vectype;
4386 *type_in = vectype;
4388 if (dump_enabled_p ())
4389 dump_printf_loc (MSG_NOTE, vect_location,
4390 "gather/scatter pattern detected:\n");
4392 return pattern_stmt;
4395 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4397 static gimple *
4398 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
4399 tree *type_out)
4401 gimple *last_stmt = stmts->pop ();
4402 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4403 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4404 last_stmt_info,
4405 type_in, type_out);
4406 if (pattern_stmt)
4407 stmts->safe_push (last_stmt);
4408 return pattern_stmt;
4411 /* Mark statements that are involved in a pattern. */
4413 static inline void
4414 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4415 tree pattern_vectype)
4417 stmt_vec_info pattern_stmt_info, def_stmt_info;
4418 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4419 vec_info *vinfo = orig_stmt_info->vinfo;
4420 gimple *def_stmt;
4422 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4423 if (pattern_stmt_info == NULL)
4425 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4426 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4428 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4430 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4431 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4432 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4433 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4434 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4435 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4436 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
4437 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4438 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
4440 gimple_stmt_iterator si;
4441 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
4442 !gsi_end_p (si); gsi_next (&si))
4444 def_stmt = gsi_stmt (si);
4445 def_stmt_info = vinfo_for_stmt (def_stmt);
4446 if (def_stmt_info == NULL)
4448 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4449 set_vinfo_for_stmt (def_stmt, def_stmt_info);
4451 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4452 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4453 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4454 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4455 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4460 /* Function vect_pattern_recog_1
4462 Input:
4463 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4464 computation pattern.
4465 STMT: A stmt from which the pattern search should start.
4467 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4468 expression that computes the same functionality and can be used to
4469 replace the sequence of stmts that are involved in the pattern.
4471 Output:
4472 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4473 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4474 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4475 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4476 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4477 to the available target pattern.
4479 This function also does some bookkeeping, as explained in the documentation
4480 for vect_recog_pattern. */
4482 static bool
4483 vect_pattern_recog_1 (vect_recog_func *recog_func,
4484 gimple_stmt_iterator si,
4485 vec<gimple *> *stmts_to_replace)
4487 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4488 stmt_vec_info stmt_info;
4489 loop_vec_info loop_vinfo;
4490 tree pattern_vectype;
4491 tree type_in, type_out;
4492 enum tree_code code;
4493 int i;
4495 stmts_to_replace->truncate (0);
4496 stmts_to_replace->quick_push (stmt);
4497 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4498 if (!pattern_stmt)
4499 return false;
4501 stmt = stmts_to_replace->last ();
4502 stmt_info = vinfo_for_stmt (stmt);
4503 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4505 if (VECTOR_BOOLEAN_TYPE_P (type_in)
4506 || VECTOR_TYPE_P (type_in))
4508 /* No need to check target support (already checked by the pattern
4509 recognition function). */
4510 pattern_vectype = type_out ? type_out : type_in;
4512 else
4514 /* Check target support */
4515 type_in = get_vectype_for_scalar_type (type_in);
4516 if (!type_in)
4517 return false;
4518 if (type_out)
4519 type_out = get_vectype_for_scalar_type (type_out);
4520 else
4521 type_out = type_in;
4522 if (!type_out)
4523 return false;
4524 pattern_vectype = type_out;
4526 if (is_gimple_assign (pattern_stmt))
4528 enum insn_code icode;
4529 code = gimple_assign_rhs_code (pattern_stmt);
4530 optab optab = optab_for_tree_code (code, type_in, optab_default);
4531 machine_mode vec_mode = TYPE_MODE (type_in);
4532 if (!optab
4533 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4534 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4535 return false;
4537 else
4538 gcc_assert (is_gimple_call (pattern_stmt));
4541 /* Found a vectorizable pattern. */
4542 if (dump_enabled_p ())
4544 dump_printf_loc (MSG_NOTE, vect_location,
4545 "%s pattern recognized: ", recog_func->name);
4546 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4549 /* Mark the stmts that are involved in the pattern. */
4550 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4552 /* Patterns cannot be vectorized using SLP, because they change the order of
4553 computation. */
4554 if (loop_vinfo)
4556 unsigned ix, ix2;
4557 gimple **elem_ptr;
4558 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4559 elem_ptr, *elem_ptr == stmt);
4562 /* It is possible that additional pattern stmts are created and inserted in
4563 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4564 relevant statements. */
4565 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4566 && (unsigned) i < (stmts_to_replace->length () - 1);
4567 i++)
4569 stmt_info = vinfo_for_stmt (stmt);
4570 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4571 if (dump_enabled_p ())
4573 dump_printf_loc (MSG_NOTE, vect_location,
4574 "additional pattern stmt: ");
4575 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4578 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4581 return true;
4585 /* Function vect_pattern_recog
4587 Input:
4588 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4589 computation idioms.
4591 Output - for each computation idiom that is detected we create a new stmt
4592 that provides the same functionality and that can be vectorized. We
4593 also record some information in the struct_stmt_info of the relevant
4594 stmts, as explained below:
4596 At the entry to this function we have the following stmts, with the
4597 following initial value in the STMT_VINFO fields:
4599 stmt in_pattern_p related_stmt vec_stmt
4600 S1: a_i = .... - - -
4601 S2: a_2 = ..use(a_i).. - - -
4602 S3: a_1 = ..use(a_2).. - - -
4603 S4: a_0 = ..use(a_1).. - - -
4604 S5: ... = ..use(a_0).. - - -
4606 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4607 represented by a single stmt. We then:
4608 - create a new stmt S6 equivalent to the pattern (the stmt is not
4609 inserted into the code)
4610 - fill in the STMT_VINFO fields as follows:
4612 in_pattern_p related_stmt vec_stmt
4613 S1: a_i = .... - - -
4614 S2: a_2 = ..use(a_i).. - - -
4615 S3: a_1 = ..use(a_2).. - - -
4616 S4: a_0 = ..use(a_1).. true S6 -
4617 '---> S6: a_new = .... - S4 -
4618 S5: ... = ..use(a_0).. - - -
4620 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4621 to each other through the RELATED_STMT field).
4623 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4624 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4625 remain irrelevant unless used by stmts other than S4.
4627 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4628 (because they are marked as irrelevant). It will vectorize S6, and record
4629 a pointer to the new vector stmt VS6 from S6 (as usual).
4630 S4 will be skipped, and S5 will be vectorized as usual:
4632 in_pattern_p related_stmt vec_stmt
4633 S1: a_i = .... - - -
4634 S2: a_2 = ..use(a_i).. - - -
4635 S3: a_1 = ..use(a_2).. - - -
4636 > VS6: va_new = .... - - -
4637 S4: a_0 = ..use(a_1).. true S6 VS6
4638 '---> S6: a_new = .... - S4 VS6
4639 > VS5: ... = ..vuse(va_new).. - - -
4640 S5: ... = ..use(a_0).. - - -
4642 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4643 elsewhere), and we'll end up with:
4645 VS6: va_new = ....
4646 VS5: ... = ..vuse(va_new)..
4648 In case of more than one pattern statements, e.g., widen-mult with
4649 intermediate type:
4651 S1 a_t = ;
4652 S2 a_T = (TYPE) a_t;
4653 '--> S3: a_it = (interm_type) a_t;
4654 S4 prod_T = a_T * CONST;
4655 '--> S5: prod_T' = a_it w* CONST;
4657 there may be other users of a_T outside the pattern. In that case S2 will
4658 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4659 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4660 be recorded in S3. */
4662 void
4663 vect_pattern_recog (vec_info *vinfo)
4665 struct loop *loop;
4666 basic_block *bbs;
4667 unsigned int nbbs;
4668 gimple_stmt_iterator si;
4669 unsigned int i, j;
4670 auto_vec<gimple *, 1> stmts_to_replace;
4671 gimple *stmt;
4673 DUMP_VECT_SCOPE ("vect_pattern_recog");
4675 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4677 loop = LOOP_VINFO_LOOP (loop_vinfo);
4678 bbs = LOOP_VINFO_BBS (loop_vinfo);
4679 nbbs = loop->num_nodes;
4681 /* Scan through the loop stmts, applying the pattern recognition
4682 functions starting at each stmt visited: */
4683 for (i = 0; i < nbbs; i++)
4685 basic_block bb = bbs[i];
4686 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4688 /* Scan over all generic vect_recog_xxx_pattern functions. */
4689 for (j = 0; j < NUM_PATTERNS; j++)
4690 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4691 &stmts_to_replace))
4692 break;
4696 else
4698 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4699 for (si = bb_vinfo->region_begin;
4700 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4702 if ((stmt = gsi_stmt (si))
4703 && vinfo_for_stmt (stmt)
4704 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4705 continue;
4707 /* Scan over all generic vect_recog_xxx_pattern functions. */
4708 for (j = 0; j < NUM_PATTERNS; j++)
4709 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4710 &stmts_to_replace))
4711 break;