[NDS32] Add intrinsic functions for TLB operation and data prefech.
[official-gcc.git] / gcc / tree-vect-patterns.c
blob621ed07758ff587d3fe6fca0c4ee1a4579a7b6fe
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
49 /* Pattern recognition functions */
50 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
53 tree *);
54 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
55 tree *);
56 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
57 tree *);
58 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
59 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
60 tree *);
61 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
62 tree *, tree *);
63 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
64 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
65 tree *, tree *);
66 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
67 tree *, tree *);
69 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
70 tree *, tree *);
72 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
73 tree *, tree *);
74 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
75 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
76 static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
77 tree *);
79 struct vect_recog_func
81 vect_recog_func_ptr fn;
82 const char *name;
85 /* Note that ordering matters - the first pattern matching on a stmt
86 is taken which means usually the more complex one needs to preceed
87 the less comples onex (widen_sum only after dot_prod or sad for example). */
88 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
89 { vect_recog_widen_mult_pattern, "widen_mult" },
90 { vect_recog_dot_prod_pattern, "dot_prod" },
91 { vect_recog_sad_pattern, "sad" },
92 { vect_recog_widen_sum_pattern, "widen_sum" },
93 { vect_recog_pow_pattern, "pow" },
94 { vect_recog_widen_shift_pattern, "widen_shift" },
95 { vect_recog_over_widening_pattern, "over_widening" },
96 { vect_recog_rotate_pattern, "rotate" },
97 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
98 { vect_recog_divmod_pattern, "divmod" },
99 { vect_recog_mult_pattern, "mult" },
100 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
101 { vect_recog_bool_pattern, "bool" },
102 /* This must come before mask conversion, and includes the parts
103 of mask conversion that are needed for gather and scatter
104 internal functions. */
105 { vect_recog_gather_scatter_pattern, "gather_scatter" },
106 { vect_recog_mask_conversion_pattern, "mask_conversion" }
109 static inline void
110 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
112 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
113 stmt);
116 static inline void
117 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
119 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
120 append_pattern_def_seq (stmt_info, stmt);
123 /* Check whether STMT2 is in the same loop or basic block as STMT1.
124 Which of the two applies depends on whether we're currently doing
125 loop-based or basic-block-based vectorization, as determined by
126 the vinfo_for_stmt for STMT1 (which must be defined).
128 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
129 to be defined as well. */
131 static bool
132 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
134 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
135 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
138 /* If the LHS of DEF_STMT has a single use, and that statement is
139 in the same loop or basic block, return it. */
141 static gimple *
142 vect_single_imm_use (gimple *def_stmt)
144 tree lhs = gimple_assign_lhs (def_stmt);
145 use_operand_p use_p;
146 gimple *use_stmt;
148 if (!single_imm_use (lhs, &use_p, &use_stmt))
149 return NULL;
151 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
152 return NULL;
154 return use_stmt;
157 /* Check whether NAME, an ssa-name used in USE_STMT,
158 is a result of a type promotion, such that:
159 DEF_STMT: NAME = NOP (name0)
160 If CHECK_SIGN is TRUE, check that either both types are signed or both are
161 unsigned. */
163 static bool
164 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
165 tree *orig_type, gimple **def_stmt, bool *promotion)
167 gimple *dummy_gimple;
168 stmt_vec_info stmt_vinfo;
169 tree type = TREE_TYPE (name);
170 tree oprnd0;
171 enum vect_def_type dt;
173 stmt_vinfo = vinfo_for_stmt (use_stmt);
174 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
175 return false;
177 if (dt != vect_internal_def
178 && dt != vect_external_def && dt != vect_constant_def)
179 return false;
181 if (!*def_stmt)
182 return false;
184 if (dt == vect_internal_def)
186 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
187 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
188 return false;
191 if (!is_gimple_assign (*def_stmt))
192 return false;
194 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
195 return false;
197 oprnd0 = gimple_assign_rhs1 (*def_stmt);
199 *orig_type = TREE_TYPE (oprnd0);
200 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
201 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
202 return false;
204 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
205 *promotion = true;
206 else
207 *promotion = false;
209 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
210 return false;
212 return true;
215 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
216 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
218 static tree
219 vect_recog_temp_ssa_var (tree type, gimple *stmt)
221 return make_temp_ssa_name (type, stmt, "patt");
224 /* Return true if STMT_VINFO describes a reduction for which reassociation
225 is allowed. If STMT_INFO is part of a group, assume that it's part of
226 a reduction chain and optimistically assume that all statements
227 except the last allow reassociation. */
229 static bool
230 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
232 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
233 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
234 : GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
237 /* Function vect_recog_dot_prod_pattern
239 Try to find the following pattern:
241 type x_t, y_t;
242 TYPE1 prod;
243 TYPE2 sum = init;
244 loop:
245 sum_0 = phi <init, sum_1>
246 S1 x_t = ...
247 S2 y_t = ...
248 S3 x_T = (TYPE1) x_t;
249 S4 y_T = (TYPE1) y_t;
250 S5 prod = x_T * y_T;
251 [S6 prod = (TYPE2) prod; #optional]
252 S7 sum_1 = prod + sum_0;
254 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
255 same size of 'TYPE1' or bigger. This is a special case of a reduction
256 computation.
258 Input:
260 * STMTS: Contains a stmt from which the pattern search begins. In the
261 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
262 will be detected.
264 Output:
266 * TYPE_IN: The type of the input arguments to the pattern.
268 * TYPE_OUT: The type of the output of this pattern.
270 * Return value: A new stmt that will be used to replace the sequence of
271 stmts that constitute the pattern. In this case it will be:
272 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
274 Note: The dot-prod idiom is a widening reduction pattern that is
275 vectorized without preserving all the intermediate results. It
276 produces only N/2 (widened) results (by summing up pairs of
277 intermediate results) rather than all N results. Therefore, we
278 cannot allow this pattern when we want to get all the results and in
279 the correct order (as is the case when this computation is in an
280 inner-loop nested in an outer-loop that us being vectorized). */
282 static gimple *
283 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
284 tree *type_out)
286 gimple *stmt, *last_stmt = (*stmts)[0];
287 tree oprnd0, oprnd1;
288 tree oprnd00, oprnd01;
289 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
290 tree type, half_type;
291 gimple *pattern_stmt;
292 tree prod_type;
293 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
294 struct loop *loop;
295 tree var;
296 bool promotion;
298 if (!loop_info)
299 return NULL;
301 loop = LOOP_VINFO_LOOP (loop_info);
303 /* We don't allow changing the order of the computation in the inner-loop
304 when doing outer-loop vectorization. */
305 if (loop && nested_in_vect_loop_p (loop, last_stmt))
306 return NULL;
308 if (!is_gimple_assign (last_stmt))
309 return NULL;
311 type = gimple_expr_type (last_stmt);
313 /* Look for the following pattern
314 DX = (TYPE1) X;
315 DY = (TYPE1) Y;
316 DPROD = DX * DY;
317 DDPROD = (TYPE2) DPROD;
318 sum_1 = DDPROD + sum_0;
319 In which
320 - DX is double the size of X
321 - DY is double the size of Y
322 - DX, DY, DPROD all have the same type
323 - sum is the same size of DPROD or bigger
324 - sum has been recognized as a reduction variable.
326 This is equivalent to:
327 DPROD = X w* Y; #widen mult
328 sum_1 = DPROD w+ sum_0; #widen summation
330 DPROD = X w* Y; #widen mult
331 sum_1 = DPROD + sum_0; #summation
334 /* Starting from LAST_STMT, follow the defs of its uses in search
335 of the above pattern. */
337 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
338 return NULL;
340 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
342 /* Has been detected as widening-summation? */
344 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
345 type = gimple_expr_type (stmt);
346 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
347 return NULL;
348 oprnd0 = gimple_assign_rhs1 (stmt);
349 oprnd1 = gimple_assign_rhs2 (stmt);
350 half_type = TREE_TYPE (oprnd0);
352 else
354 gimple *def_stmt;
356 if (!vect_reassociating_reduction_p (stmt_vinfo))
357 return NULL;
358 oprnd0 = gimple_assign_rhs1 (last_stmt);
359 oprnd1 = gimple_assign_rhs2 (last_stmt);
360 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
361 || !types_compatible_p (TREE_TYPE (oprnd1), type))
362 return NULL;
363 stmt = last_stmt;
365 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
366 &promotion)
367 && promotion)
369 stmt = def_stmt;
370 oprnd0 = gimple_assign_rhs1 (stmt);
372 else
373 half_type = type;
376 /* So far so good. Since last_stmt was detected as a (summation) reduction,
377 we know that oprnd1 is the reduction variable (defined by a loop-header
378 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
379 Left to check that oprnd0 is defined by a (widen_)mult_expr */
380 if (TREE_CODE (oprnd0) != SSA_NAME)
381 return NULL;
383 prod_type = half_type;
384 stmt = SSA_NAME_DEF_STMT (oprnd0);
386 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
387 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
388 return NULL;
390 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
391 inside the loop (in case we are analyzing an outer-loop). */
392 if (!is_gimple_assign (stmt))
393 return NULL;
394 stmt_vinfo = vinfo_for_stmt (stmt);
395 gcc_assert (stmt_vinfo);
396 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
397 return NULL;
398 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
399 return NULL;
400 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
402 /* Has been detected as a widening multiplication? */
404 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
405 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
406 return NULL;
407 stmt_vinfo = vinfo_for_stmt (stmt);
408 gcc_assert (stmt_vinfo);
409 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
410 oprnd00 = gimple_assign_rhs1 (stmt);
411 oprnd01 = gimple_assign_rhs2 (stmt);
412 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
413 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
415 else
417 tree half_type0, half_type1;
418 gimple *def_stmt;
419 tree oprnd0, oprnd1;
421 oprnd0 = gimple_assign_rhs1 (stmt);
422 oprnd1 = gimple_assign_rhs2 (stmt);
423 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
424 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
425 return NULL;
426 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
427 &promotion)
428 || !promotion)
429 return NULL;
430 oprnd00 = gimple_assign_rhs1 (def_stmt);
431 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
432 &promotion)
433 || !promotion)
434 return NULL;
435 oprnd01 = gimple_assign_rhs1 (def_stmt);
436 if (!types_compatible_p (half_type0, half_type1))
437 return NULL;
438 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
439 return NULL;
442 half_type = TREE_TYPE (oprnd00);
443 *type_in = half_type;
444 *type_out = type;
446 /* Pattern detected. Create a stmt to be used to replace the pattern: */
447 var = vect_recog_temp_ssa_var (type, NULL);
448 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
449 oprnd00, oprnd01, oprnd1);
451 if (dump_enabled_p ())
453 dump_printf_loc (MSG_NOTE, vect_location,
454 "vect_recog_dot_prod_pattern: detected: ");
455 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
458 return pattern_stmt;
462 /* Function vect_recog_sad_pattern
464 Try to find the following Sum of Absolute Difference (SAD) pattern:
466 type x_t, y_t;
467 signed TYPE1 diff, abs_diff;
468 TYPE2 sum = init;
469 loop:
470 sum_0 = phi <init, sum_1>
471 S1 x_t = ...
472 S2 y_t = ...
473 S3 x_T = (TYPE1) x_t;
474 S4 y_T = (TYPE1) y_t;
475 S5 diff = x_T - y_T;
476 S6 abs_diff = ABS_EXPR <diff>;
477 [S7 abs_diff = (TYPE2) abs_diff; #optional]
478 S8 sum_1 = abs_diff + sum_0;
480 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
481 same size of 'TYPE1' or bigger. This is a special case of a reduction
482 computation.
484 Input:
486 * STMTS: Contains a stmt from which the pattern search begins. In the
487 example, when this function is called with S8, the pattern
488 {S3,S4,S5,S6,S7,S8} will be detected.
490 Output:
492 * TYPE_IN: The type of the input arguments to the pattern.
494 * TYPE_OUT: The type of the output of this pattern.
496 * Return value: A new stmt that will be used to replace the sequence of
497 stmts that constitute the pattern. In this case it will be:
498 SAD_EXPR <x_t, y_t, sum_0>
501 static gimple *
502 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
503 tree *type_out)
505 gimple *last_stmt = (*stmts)[0];
506 tree sad_oprnd0, sad_oprnd1;
507 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
508 tree half_type;
509 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
510 struct loop *loop;
511 bool promotion;
513 if (!loop_info)
514 return NULL;
516 loop = LOOP_VINFO_LOOP (loop_info);
518 /* We don't allow changing the order of the computation in the inner-loop
519 when doing outer-loop vectorization. */
520 if (loop && nested_in_vect_loop_p (loop, last_stmt))
521 return NULL;
523 if (!is_gimple_assign (last_stmt))
524 return NULL;
526 tree sum_type = gimple_expr_type (last_stmt);
528 /* Look for the following pattern
529 DX = (TYPE1) X;
530 DY = (TYPE1) Y;
531 DDIFF = DX - DY;
532 DAD = ABS_EXPR <DDIFF>;
533 DDPROD = (TYPE2) DPROD;
534 sum_1 = DAD + sum_0;
535 In which
536 - DX is at least double the size of X
537 - DY is at least double the size of Y
538 - DX, DY, DDIFF, DAD all have the same type
539 - sum is the same size of DAD or bigger
540 - sum has been recognized as a reduction variable.
542 This is equivalent to:
543 DDIFF = X w- Y; #widen sub
544 DAD = ABS_EXPR <DDIFF>;
545 sum_1 = DAD w+ sum_0; #widen summation
547 DDIFF = X w- Y; #widen sub
548 DAD = ABS_EXPR <DDIFF>;
549 sum_1 = DAD + sum_0; #summation
552 /* Starting from LAST_STMT, follow the defs of its uses in search
553 of the above pattern. */
555 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
556 return NULL;
558 tree plus_oprnd0, plus_oprnd1;
560 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
562 /* Has been detected as widening-summation? */
564 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
565 sum_type = gimple_expr_type (stmt);
566 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
567 return NULL;
568 plus_oprnd0 = gimple_assign_rhs1 (stmt);
569 plus_oprnd1 = gimple_assign_rhs2 (stmt);
570 half_type = TREE_TYPE (plus_oprnd0);
572 else
574 gimple *def_stmt;
576 if (!vect_reassociating_reduction_p (stmt_vinfo))
577 return NULL;
578 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
579 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
580 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
581 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
582 return NULL;
584 /* The type conversion could be promotion, demotion,
585 or just signed -> unsigned. */
586 if (type_conversion_p (plus_oprnd0, last_stmt, false,
587 &half_type, &def_stmt, &promotion))
588 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
589 else
590 half_type = sum_type;
593 /* So far so good. Since last_stmt was detected as a (summation) reduction,
594 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
595 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
596 Then check that plus_oprnd0 is defined by an abs_expr. */
598 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
599 return NULL;
601 tree abs_type = half_type;
602 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
604 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
605 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
606 return NULL;
608 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
609 inside the loop (in case we are analyzing an outer-loop). */
610 if (!is_gimple_assign (abs_stmt))
611 return NULL;
613 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
614 gcc_assert (abs_stmt_vinfo);
615 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
616 return NULL;
617 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
618 return NULL;
620 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
621 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
622 return NULL;
623 if (TYPE_UNSIGNED (abs_type))
624 return NULL;
626 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
628 if (TREE_CODE (abs_oprnd) != SSA_NAME)
629 return NULL;
631 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
633 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
634 if (!gimple_bb (diff_stmt)
635 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
636 return NULL;
638 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
639 inside the loop (in case we are analyzing an outer-loop). */
640 if (!is_gimple_assign (diff_stmt))
641 return NULL;
643 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
644 gcc_assert (diff_stmt_vinfo);
645 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
646 return NULL;
647 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
648 return NULL;
650 tree half_type0, half_type1;
651 gimple *def_stmt;
653 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
654 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
656 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
657 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
658 return NULL;
659 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
660 &half_type0, &def_stmt, &promotion)
661 || !promotion)
662 return NULL;
663 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
665 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
666 &half_type1, &def_stmt, &promotion)
667 || !promotion)
668 return NULL;
669 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
671 if (!types_compatible_p (half_type0, half_type1))
672 return NULL;
673 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
674 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
675 return NULL;
677 *type_in = TREE_TYPE (sad_oprnd0);
678 *type_out = sum_type;
680 /* Pattern detected. Create a stmt to be used to replace the pattern: */
681 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
682 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
683 sad_oprnd1, plus_oprnd1);
685 if (dump_enabled_p ())
687 dump_printf_loc (MSG_NOTE, vect_location,
688 "vect_recog_sad_pattern: detected: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
692 return pattern_stmt;
696 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
697 and LSHIFT_EXPR.
699 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
700 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
702 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
703 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
704 that satisfies the above restrictions, we can perform a widening opeartion
705 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
706 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
708 static bool
709 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
710 tree const_oprnd, tree *oprnd,
711 gimple **wstmt, tree type,
712 tree *half_type, gimple *def_stmt)
714 tree new_type, new_oprnd;
716 if (code != MULT_EXPR && code != LSHIFT_EXPR)
717 return false;
719 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
720 || (code == LSHIFT_EXPR
721 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
722 != 1))
723 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
725 /* CONST_OPRND is a constant of HALF_TYPE. */
726 *oprnd = gimple_assign_rhs1 (def_stmt);
727 return true;
730 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
731 return false;
733 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
734 return false;
736 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
737 a type 2 times bigger than HALF_TYPE. */
738 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
739 TYPE_UNSIGNED (type));
740 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
741 || (code == LSHIFT_EXPR
742 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
743 return false;
745 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
746 *oprnd = gimple_assign_rhs1 (def_stmt);
747 new_oprnd = make_ssa_name (new_type);
748 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
749 *oprnd = new_oprnd;
751 *half_type = new_type;
752 return true;
756 /* Function vect_recog_widen_mult_pattern
758 Try to find the following pattern:
760 type1 a_t;
761 type2 b_t;
762 TYPE a_T, b_T, prod_T;
764 S1 a_t = ;
765 S2 b_t = ;
766 S3 a_T = (TYPE) a_t;
767 S4 b_T = (TYPE) b_t;
768 S5 prod_T = a_T * b_T;
770 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
772 Also detect unsigned cases:
774 unsigned type1 a_t;
775 unsigned type2 b_t;
776 unsigned TYPE u_prod_T;
777 TYPE a_T, b_T, prod_T;
779 S1 a_t = ;
780 S2 b_t = ;
781 S3 a_T = (TYPE) a_t;
782 S4 b_T = (TYPE) b_t;
783 S5 prod_T = a_T * b_T;
784 S6 u_prod_T = (unsigned TYPE) prod_T;
786 and multiplication by constants:
788 type a_t;
789 TYPE a_T, prod_T;
791 S1 a_t = ;
792 S3 a_T = (TYPE) a_t;
793 S5 prod_T = a_T * CONST;
795 A special case of multiplication by constants is when 'TYPE' is 4 times
796 bigger than 'type', but CONST fits an intermediate type 2 times smaller
797 than 'TYPE'. In that case we create an additional pattern stmt for S3
798 to create a variable of the intermediate type, and perform widen-mult
799 on the intermediate type as well:
801 type a_t;
802 interm_type a_it;
803 TYPE a_T, prod_T, prod_T';
805 S1 a_t = ;
806 S3 a_T = (TYPE) a_t;
807 '--> a_it = (interm_type) a_t;
808 S5 prod_T = a_T * CONST;
809 '--> prod_T' = a_it w* CONST;
811 Input/Output:
813 * STMTS: Contains a stmt from which the pattern search begins. In the
814 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
815 is detected. In case of unsigned widen-mult, the original stmt (S5) is
816 replaced with S6 in STMTS. In case of multiplication by a constant
817 of an intermediate type (the last case above), STMTS also contains S3
818 (inserted before S5).
820 Output:
822 * TYPE_IN: The type of the input arguments to the pattern.
824 * TYPE_OUT: The type of the output of this pattern.
826 * Return value: A new stmt that will be used to replace the sequence of
827 stmts that constitute the pattern. In this case it will be:
828 WIDEN_MULT <a_t, b_t>
829 If the result of WIDEN_MULT needs to be converted to a larger type, the
830 returned stmt will be this type conversion stmt.
833 static gimple *
834 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
835 tree *type_in, tree *type_out)
837 gimple *last_stmt = stmts->pop ();
838 gimple *def_stmt0, *def_stmt1;
839 tree oprnd0, oprnd1;
840 tree type, half_type0, half_type1;
841 gimple *new_stmt = NULL, *pattern_stmt = NULL;
842 tree vectype, vecitype;
843 tree var;
844 enum tree_code dummy_code;
845 int dummy_int;
846 vec<tree> dummy_vec;
847 bool op1_ok;
848 bool promotion;
850 if (!is_gimple_assign (last_stmt))
851 return NULL;
853 type = gimple_expr_type (last_stmt);
855 /* Starting from LAST_STMT, follow the defs of its uses in search
856 of the above pattern. */
858 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
859 return NULL;
861 oprnd0 = gimple_assign_rhs1 (last_stmt);
862 oprnd1 = gimple_assign_rhs2 (last_stmt);
863 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
864 || !types_compatible_p (TREE_TYPE (oprnd1), type))
865 return NULL;
867 /* Check argument 0. */
868 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
869 &promotion)
870 || !promotion)
871 return NULL;
872 /* Check argument 1. */
873 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
874 &def_stmt1, &promotion);
876 if (op1_ok && promotion)
878 oprnd0 = gimple_assign_rhs1 (def_stmt0);
879 oprnd1 = gimple_assign_rhs1 (def_stmt1);
881 else
883 if (TREE_CODE (oprnd1) == INTEGER_CST
884 && TREE_CODE (half_type0) == INTEGER_TYPE
885 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
886 &oprnd0, &new_stmt, type,
887 &half_type0, def_stmt0))
889 half_type1 = half_type0;
890 oprnd1 = fold_convert (half_type1, oprnd1);
892 else
893 return NULL;
896 /* If the two arguments have different sizes, convert the one with
897 the smaller type into the larger type. */
898 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
900 /* If we already used up the single-stmt slot give up. */
901 if (new_stmt)
902 return NULL;
904 tree* oprnd = NULL;
905 gimple *def_stmt = NULL;
907 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
909 def_stmt = def_stmt0;
910 half_type0 = half_type1;
911 oprnd = &oprnd0;
913 else
915 def_stmt = def_stmt1;
916 half_type1 = half_type0;
917 oprnd = &oprnd1;
920 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
921 tree new_oprnd = make_ssa_name (half_type0);
922 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
923 *oprnd = new_oprnd;
926 /* Handle unsigned case. Look for
927 S6 u_prod_T = (unsigned TYPE) prod_T;
928 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
929 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
931 gimple *use_stmt;
932 tree use_lhs;
933 tree use_type;
935 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
936 return NULL;
938 use_stmt = vect_single_imm_use (last_stmt);
939 if (!use_stmt || !is_gimple_assign (use_stmt)
940 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
941 return NULL;
943 use_lhs = gimple_assign_lhs (use_stmt);
944 use_type = TREE_TYPE (use_lhs);
945 if (!INTEGRAL_TYPE_P (use_type)
946 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
947 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
948 return NULL;
950 type = use_type;
951 last_stmt = use_stmt;
954 if (!types_compatible_p (half_type0, half_type1))
955 return NULL;
957 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
958 to get an intermediate result of type ITYPE. In this case we need
959 to build a statement to convert this intermediate result to type TYPE. */
960 tree itype = type;
961 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
962 itype = build_nonstandard_integer_type
963 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
964 TYPE_UNSIGNED (type));
966 /* Pattern detected. */
967 if (dump_enabled_p ())
968 dump_printf_loc (MSG_NOTE, vect_location,
969 "vect_recog_widen_mult_pattern: detected:\n");
971 /* Check target support */
972 vectype = get_vectype_for_scalar_type (half_type0);
973 vecitype = get_vectype_for_scalar_type (itype);
974 if (!vectype
975 || !vecitype
976 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
977 vecitype, vectype,
978 &dummy_code, &dummy_code,
979 &dummy_int, &dummy_vec))
980 return NULL;
982 *type_in = vectype;
983 *type_out = get_vectype_for_scalar_type (type);
985 /* Pattern supported. Create a stmt to be used to replace the pattern: */
986 var = vect_recog_temp_ssa_var (itype, NULL);
987 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
989 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
990 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
992 /* If the original two operands have different sizes, we may need to convert
993 the smaller one into the larget type. If this is the case, at this point
994 the new stmt is already built. */
995 if (new_stmt)
997 append_pattern_def_seq (stmt_vinfo, new_stmt);
998 stmt_vec_info new_stmt_info
999 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
1000 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1001 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1004 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1005 the result of the widen-mult operation into type TYPE. */
1006 if (itype != type)
1008 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1009 stmt_vec_info pattern_stmt_info
1010 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
1011 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1012 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1013 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1014 NOP_EXPR,
1015 gimple_assign_lhs (pattern_stmt));
1018 if (dump_enabled_p ())
1019 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1021 stmts->safe_push (last_stmt);
1022 return pattern_stmt;
1026 /* Function vect_recog_pow_pattern
1028 Try to find the following pattern:
1030 x = POW (y, N);
1032 with POW being one of pow, powf, powi, powif and N being
1033 either 2 or 0.5.
1035 Input:
1037 * LAST_STMT: A stmt from which the pattern search begins.
1039 Output:
1041 * TYPE_IN: The type of the input arguments to the pattern.
1043 * TYPE_OUT: The type of the output of this pattern.
1045 * Return value: A new stmt that will be used to replace the sequence of
1046 stmts that constitute the pattern. In this case it will be:
1047 x = x * x
1049 x = sqrt (x)
1052 static gimple *
1053 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1054 tree *type_out)
1056 gimple *last_stmt = (*stmts)[0];
1057 tree base, exp;
1058 gimple *stmt;
1059 tree var;
1061 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1062 return NULL;
1064 switch (gimple_call_combined_fn (last_stmt))
1066 CASE_CFN_POW:
1067 CASE_CFN_POWI:
1068 break;
1070 default:
1071 return NULL;
1074 base = gimple_call_arg (last_stmt, 0);
1075 exp = gimple_call_arg (last_stmt, 1);
1076 if (TREE_CODE (exp) != REAL_CST
1077 && TREE_CODE (exp) != INTEGER_CST)
1079 if (flag_unsafe_math_optimizations
1080 && TREE_CODE (base) == REAL_CST
1081 && !gimple_call_internal_p (last_stmt))
1083 combined_fn log_cfn;
1084 built_in_function exp_bfn;
1085 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1087 case BUILT_IN_POW:
1088 log_cfn = CFN_BUILT_IN_LOG;
1089 exp_bfn = BUILT_IN_EXP;
1090 break;
1091 case BUILT_IN_POWF:
1092 log_cfn = CFN_BUILT_IN_LOGF;
1093 exp_bfn = BUILT_IN_EXPF;
1094 break;
1095 case BUILT_IN_POWL:
1096 log_cfn = CFN_BUILT_IN_LOGL;
1097 exp_bfn = BUILT_IN_EXPL;
1098 break;
1099 default:
1100 return NULL;
1102 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1103 tree exp_decl = builtin_decl_implicit (exp_bfn);
1104 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1105 does that, but if C is a power of 2, we want to use
1106 exp2 (log2 (C) * x) in the non-vectorized version, but for
1107 vectorization we don't have vectorized exp2. */
1108 if (logc
1109 && TREE_CODE (logc) == REAL_CST
1110 && exp_decl
1111 && lookup_attribute ("omp declare simd",
1112 DECL_ATTRIBUTES (exp_decl)))
1114 cgraph_node *node = cgraph_node::get_create (exp_decl);
1115 if (node->simd_clones == NULL)
1117 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1118 || node->definition)
1119 return NULL;
1120 expand_simd_clones (node);
1121 if (node->simd_clones == NULL)
1122 return NULL;
1124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1125 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1126 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1127 new_pattern_def_seq (stmt_vinfo, g);
1128 *type_in = TREE_TYPE (base);
1129 *type_out = NULL_TREE;
1130 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1131 g = gimple_build_call (exp_decl, 1, def);
1132 gimple_call_set_lhs (g, res);
1133 return g;
1137 return NULL;
1140 /* We now have a pow or powi builtin function call with a constant
1141 exponent. */
1143 *type_out = NULL_TREE;
1145 /* Catch squaring. */
1146 if ((tree_fits_shwi_p (exp)
1147 && tree_to_shwi (exp) == 2)
1148 || (TREE_CODE (exp) == REAL_CST
1149 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1151 *type_in = TREE_TYPE (base);
1153 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1154 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1155 return stmt;
1158 /* Catch square root. */
1159 if (TREE_CODE (exp) == REAL_CST
1160 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1162 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1163 if (*type_in
1164 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1165 OPTIMIZE_FOR_SPEED))
1167 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1168 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1169 gimple_call_set_lhs (stmt, var);
1170 gimple_call_set_nothrow (stmt, true);
1171 return stmt;
1175 return NULL;
1179 /* Function vect_recog_widen_sum_pattern
1181 Try to find the following pattern:
1183 type x_t;
1184 TYPE x_T, sum = init;
1185 loop:
1186 sum_0 = phi <init, sum_1>
1187 S1 x_t = *p;
1188 S2 x_T = (TYPE) x_t;
1189 S3 sum_1 = x_T + sum_0;
1191 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1192 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1193 a special case of a reduction computation.
1195 Input:
1197 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1198 when this function is called with S3, the pattern {S2,S3} will be detected.
1200 Output:
1202 * TYPE_IN: The type of the input arguments to the pattern.
1204 * TYPE_OUT: The type of the output of this pattern.
1206 * Return value: A new stmt that will be used to replace the sequence of
1207 stmts that constitute the pattern. In this case it will be:
1208 WIDEN_SUM <x_t, sum_0>
1210 Note: The widening-sum idiom is a widening reduction pattern that is
1211 vectorized without preserving all the intermediate results. It
1212 produces only N/2 (widened) results (by summing up pairs of
1213 intermediate results) rather than all N results. Therefore, we
1214 cannot allow this pattern when we want to get all the results and in
1215 the correct order (as is the case when this computation is in an
1216 inner-loop nested in an outer-loop that us being vectorized). */
1218 static gimple *
1219 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1220 tree *type_out)
1222 gimple *stmt, *last_stmt = (*stmts)[0];
1223 tree oprnd0, oprnd1;
1224 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1225 tree type, half_type;
1226 gimple *pattern_stmt;
1227 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1228 struct loop *loop;
1229 tree var;
1230 bool promotion;
1232 if (!loop_info)
1233 return NULL;
1235 loop = LOOP_VINFO_LOOP (loop_info);
1237 /* We don't allow changing the order of the computation in the inner-loop
1238 when doing outer-loop vectorization. */
1239 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1240 return NULL;
1242 if (!is_gimple_assign (last_stmt))
1243 return NULL;
1245 type = gimple_expr_type (last_stmt);
1247 /* Look for the following pattern
1248 DX = (TYPE) X;
1249 sum_1 = DX + sum_0;
1250 In which DX is at least double the size of X, and sum_1 has been
1251 recognized as a reduction variable.
1254 /* Starting from LAST_STMT, follow the defs of its uses in search
1255 of the above pattern. */
1257 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1258 return NULL;
1260 if (!vect_reassociating_reduction_p (stmt_vinfo))
1261 return NULL;
1263 oprnd0 = gimple_assign_rhs1 (last_stmt);
1264 oprnd1 = gimple_assign_rhs2 (last_stmt);
1265 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1266 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1267 return NULL;
1269 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1270 we know that oprnd1 is the reduction variable (defined by a loop-header
1271 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1272 Left to check that oprnd0 is defined by a cast from type 'type' to type
1273 'TYPE'. */
1275 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1276 &promotion)
1277 || !promotion)
1278 return NULL;
1280 oprnd0 = gimple_assign_rhs1 (stmt);
1281 *type_in = half_type;
1282 *type_out = type;
1284 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1285 var = vect_recog_temp_ssa_var (type, NULL);
1286 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1288 if (dump_enabled_p ())
1290 dump_printf_loc (MSG_NOTE, vect_location,
1291 "vect_recog_widen_sum_pattern: detected: ");
1292 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1295 return pattern_stmt;
1299 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1301 Input:
1302 STMT - a statement to check.
1303 DEF - we support operations with two operands, one of which is constant.
1304 The other operand can be defined by a demotion operation, or by a
1305 previous statement in a sequence of over-promoted operations. In the
1306 later case DEF is used to replace that operand. (It is defined by a
1307 pattern statement we created for the previous statement in the
1308 sequence).
1310 Input/output:
1311 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1312 NULL, it's the type of DEF.
1313 STMTS - additional pattern statements. If a pattern statement (type
1314 conversion) is created in this function, its original statement is
1315 added to STMTS.
1317 Output:
1318 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1319 operands to use in the new pattern statement for STMT (will be created
1320 in vect_recog_over_widening_pattern ()).
1321 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1322 statements for STMT: the first one is a type promotion and the second
1323 one is the operation itself. We return the type promotion statement
1324 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1325 the second pattern statement. */
1327 static bool
1328 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1329 tree *op0, tree *op1, gimple **new_def_stmt,
1330 vec<gimple *> *stmts)
1332 enum tree_code code;
1333 tree const_oprnd, oprnd;
1334 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1335 gimple *def_stmt, *new_stmt;
1336 bool first = false;
1337 bool promotion;
1339 *op0 = NULL_TREE;
1340 *op1 = NULL_TREE;
1341 *new_def_stmt = NULL;
1343 if (!is_gimple_assign (stmt))
1344 return false;
1346 code = gimple_assign_rhs_code (stmt);
1347 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1348 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1349 return false;
1351 oprnd = gimple_assign_rhs1 (stmt);
1352 const_oprnd = gimple_assign_rhs2 (stmt);
1353 type = gimple_expr_type (stmt);
1355 if (TREE_CODE (oprnd) != SSA_NAME
1356 || TREE_CODE (const_oprnd) != INTEGER_CST)
1357 return false;
1359 /* If oprnd has other uses besides that in stmt we cannot mark it
1360 as being part of a pattern only. */
1361 if (!has_single_use (oprnd))
1362 return false;
1364 /* If we are in the middle of a sequence, we use DEF from a previous
1365 statement. Otherwise, OPRND has to be a result of type promotion. */
1366 if (*new_type)
1368 half_type = *new_type;
1369 oprnd = def;
1371 else
1373 first = true;
1374 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1375 &promotion)
1376 || !promotion
1377 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1378 return false;
1381 /* Can we perform the operation on a smaller type? */
1382 switch (code)
1384 case BIT_IOR_EXPR:
1385 case BIT_XOR_EXPR:
1386 case BIT_AND_EXPR:
1387 if (!int_fits_type_p (const_oprnd, half_type))
1389 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1390 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1391 return false;
1393 interm_type = build_nonstandard_integer_type (
1394 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1395 if (!int_fits_type_p (const_oprnd, interm_type))
1396 return false;
1399 break;
1401 case LSHIFT_EXPR:
1402 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1403 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1404 return false;
1406 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1407 (e.g., if the original value was char, the shift amount is at most 8
1408 if we want to use short). */
1409 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1410 return false;
1412 interm_type = build_nonstandard_integer_type (
1413 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1415 if (!vect_supportable_shift (code, interm_type))
1416 return false;
1418 break;
1420 case RSHIFT_EXPR:
1421 if (vect_supportable_shift (code, half_type))
1422 break;
1424 /* Try intermediate type - HALF_TYPE is not supported. */
1425 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1426 return false;
1428 interm_type = build_nonstandard_integer_type (
1429 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1431 if (!vect_supportable_shift (code, interm_type))
1432 return false;
1434 break;
1436 default:
1437 gcc_unreachable ();
1440 /* There are four possible cases:
1441 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1442 the first statement in the sequence)
1443 a. The original, HALF_TYPE, is not enough - we replace the promotion
1444 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1445 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1446 promotion.
1447 2. OPRND is defined by a pattern statement we created.
1448 a. Its type is not sufficient for the operation, we create a new stmt:
1449 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1450 this statement in NEW_DEF_STMT, and it is later put in
1451 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1452 b. OPRND is good to use in the new statement. */
1453 if (first)
1455 if (interm_type)
1457 /* Replace the original type conversion HALF_TYPE->TYPE with
1458 HALF_TYPE->INTERM_TYPE. */
1459 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1461 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1462 /* Check if the already created pattern stmt is what we need. */
1463 if (!is_gimple_assign (new_stmt)
1464 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1465 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1466 return false;
1468 stmts->safe_push (def_stmt);
1469 oprnd = gimple_assign_lhs (new_stmt);
1471 else
1473 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1474 oprnd = gimple_assign_rhs1 (def_stmt);
1475 new_oprnd = make_ssa_name (interm_type);
1476 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1477 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1478 stmts->safe_push (def_stmt);
1479 oprnd = new_oprnd;
1482 else
1484 /* Retrieve the operand before the type promotion. */
1485 oprnd = gimple_assign_rhs1 (def_stmt);
1488 else
1490 if (interm_type)
1492 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1493 new_oprnd = make_ssa_name (interm_type);
1494 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1495 oprnd = new_oprnd;
1496 *new_def_stmt = new_stmt;
1499 /* Otherwise, OPRND is already set. */
1502 if (interm_type)
1503 *new_type = interm_type;
1504 else
1505 *new_type = half_type;
1507 *op0 = oprnd;
1508 *op1 = fold_convert (*new_type, const_oprnd);
1510 return true;
1514 /* Try to find a statement or a sequence of statements that can be performed
1515 on a smaller type:
1517 type x_t;
1518 TYPE x_T, res0_T, res1_T;
1519 loop:
1520 S1 x_t = *p;
1521 S2 x_T = (TYPE) x_t;
1522 S3 res0_T = op (x_T, C0);
1523 S4 res1_T = op (res0_T, C1);
1524 S5 ... = () res1_T; - type demotion
1526 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1527 constants.
1528 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1529 be 'type' or some intermediate type. For now, we expect S5 to be a type
1530 demotion operation. We also check that S3 and S4 have only one use. */
1532 static gimple *
1533 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1534 tree *type_in, tree *type_out)
1536 gimple *stmt = stmts->pop ();
1537 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1538 *use_stmt = NULL;
1539 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1540 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1541 bool first;
1542 tree type = NULL;
1544 first = true;
1545 while (1)
1547 if (!vinfo_for_stmt (stmt)
1548 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1549 return NULL;
1551 new_def_stmt = NULL;
1552 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1553 &op0, &op1, &new_def_stmt,
1554 stmts))
1556 if (first)
1557 return NULL;
1558 else
1559 break;
1562 /* STMT can be performed on a smaller type. Check its uses. */
1563 use_stmt = vect_single_imm_use (stmt);
1564 if (!use_stmt || !is_gimple_assign (use_stmt))
1565 return NULL;
1567 /* Create pattern statement for STMT. */
1568 vectype = get_vectype_for_scalar_type (new_type);
1569 if (!vectype)
1570 return NULL;
1572 /* We want to collect all the statements for which we create pattern
1573 statetments, except for the case when the last statement in the
1574 sequence doesn't have a corresponding pattern statement. In such
1575 case we associate the last pattern statement with the last statement
1576 in the sequence. Therefore, we only add the original statement to
1577 the list if we know that it is not the last. */
1578 if (prev_stmt)
1579 stmts->safe_push (prev_stmt);
1581 var = vect_recog_temp_ssa_var (new_type, NULL);
1582 pattern_stmt
1583 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1584 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1585 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1587 if (dump_enabled_p ())
1589 dump_printf_loc (MSG_NOTE, vect_location,
1590 "created pattern stmt: ");
1591 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1594 type = gimple_expr_type (stmt);
1595 prev_stmt = stmt;
1596 stmt = use_stmt;
1598 first = false;
1601 /* We got a sequence. We expect it to end with a type demotion operation.
1602 Otherwise, we quit (for now). There are three possible cases: the
1603 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1604 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1605 NEW_TYPE differs (we create a new conversion statement). */
1606 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1608 use_lhs = gimple_assign_lhs (use_stmt);
1609 use_type = TREE_TYPE (use_lhs);
1610 /* Support only type demotion or signedess change. */
1611 if (!INTEGRAL_TYPE_P (use_type)
1612 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1613 return NULL;
1615 /* Check that NEW_TYPE is not bigger than the conversion result. */
1616 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1617 return NULL;
1619 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1620 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1622 /* Create NEW_TYPE->USE_TYPE conversion. */
1623 new_oprnd = make_ssa_name (use_type);
1624 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1625 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1627 *type_in = get_vectype_for_scalar_type (new_type);
1628 *type_out = get_vectype_for_scalar_type (use_type);
1630 /* We created a pattern statement for the last statement in the
1631 sequence, so we don't need to associate it with the pattern
1632 statement created for PREV_STMT. Therefore, we add PREV_STMT
1633 to the list in order to mark it later in vect_pattern_recog_1. */
1634 if (prev_stmt)
1635 stmts->safe_push (prev_stmt);
1637 else
1639 if (prev_stmt)
1640 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1641 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1643 *type_in = vectype;
1644 *type_out = NULL_TREE;
1647 stmts->safe_push (use_stmt);
1649 else
1650 /* TODO: support general case, create a conversion to the correct type. */
1651 return NULL;
1653 /* Pattern detected. */
1654 if (dump_enabled_p ())
1656 dump_printf_loc (MSG_NOTE, vect_location,
1657 "vect_recog_over_widening_pattern: detected: ");
1658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1661 return pattern_stmt;
1664 /* Detect widening shift pattern:
1666 type a_t;
1667 TYPE a_T, res_T;
1669 S1 a_t = ;
1670 S2 a_T = (TYPE) a_t;
1671 S3 res_T = a_T << CONST;
1673 where type 'TYPE' is at least double the size of type 'type'.
1675 Also detect cases where the shift result is immediately converted
1676 to another type 'result_type' that is no larger in size than 'TYPE'.
1677 In those cases we perform a widen-shift that directly results in
1678 'result_type', to avoid a possible over-widening situation:
1680 type a_t;
1681 TYPE a_T, res_T;
1682 result_type res_result;
1684 S1 a_t = ;
1685 S2 a_T = (TYPE) a_t;
1686 S3 res_T = a_T << CONST;
1687 S4 res_result = (result_type) res_T;
1688 '--> res_result' = a_t w<< CONST;
1690 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1691 create an additional pattern stmt for S2 to create a variable of an
1692 intermediate type, and perform widen-shift on the intermediate type:
1694 type a_t;
1695 interm_type a_it;
1696 TYPE a_T, res_T, res_T';
1698 S1 a_t = ;
1699 S2 a_T = (TYPE) a_t;
1700 '--> a_it = (interm_type) a_t;
1701 S3 res_T = a_T << CONST;
1702 '--> res_T' = a_it <<* CONST;
1704 Input/Output:
1706 * STMTS: Contains a stmt from which the pattern search begins.
1707 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1708 in STMTS. When an intermediate type is used and a pattern statement is
1709 created for S2, we also put S2 here (before S3).
1711 Output:
1713 * TYPE_IN: The type of the input arguments to the pattern.
1715 * TYPE_OUT: The type of the output of this pattern.
1717 * Return value: A new stmt that will be used to replace the sequence of
1718 stmts that constitute the pattern. In this case it will be:
1719 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1721 static gimple *
1722 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1723 tree *type_in, tree *type_out)
1725 gimple *last_stmt = stmts->pop ();
1726 gimple *def_stmt0;
1727 tree oprnd0, oprnd1;
1728 tree type, half_type0;
1729 gimple *pattern_stmt;
1730 tree vectype, vectype_out = NULL_TREE;
1731 tree var;
1732 enum tree_code dummy_code;
1733 int dummy_int;
1734 vec<tree> dummy_vec;
1735 gimple *use_stmt;
1736 bool promotion;
1738 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1739 return NULL;
1741 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1742 return NULL;
1744 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1745 return NULL;
1747 oprnd0 = gimple_assign_rhs1 (last_stmt);
1748 oprnd1 = gimple_assign_rhs2 (last_stmt);
1749 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1750 return NULL;
1752 /* Check operand 0: it has to be defined by a type promotion. */
1753 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1754 &promotion)
1755 || !promotion)
1756 return NULL;
1758 /* Check operand 1: has to be positive. We check that it fits the type
1759 in vect_handle_widen_op_by_const (). */
1760 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1761 return NULL;
1763 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1764 type = gimple_expr_type (last_stmt);
1766 /* Check for subsequent conversion to another type. */
1767 use_stmt = vect_single_imm_use (last_stmt);
1768 if (use_stmt && is_gimple_assign (use_stmt)
1769 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1770 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1772 tree use_lhs = gimple_assign_lhs (use_stmt);
1773 tree use_type = TREE_TYPE (use_lhs);
1775 if (INTEGRAL_TYPE_P (use_type)
1776 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1778 last_stmt = use_stmt;
1779 type = use_type;
1783 /* Check if this a widening operation. */
1784 gimple *wstmt = NULL;
1785 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1786 &oprnd0, &wstmt,
1787 type, &half_type0, def_stmt0))
1788 return NULL;
1790 /* Pattern detected. */
1791 if (dump_enabled_p ())
1792 dump_printf_loc (MSG_NOTE, vect_location,
1793 "vect_recog_widen_shift_pattern: detected:\n");
1795 /* Check target support. */
1796 vectype = get_vectype_for_scalar_type (half_type0);
1797 vectype_out = get_vectype_for_scalar_type (type);
1799 if (!vectype
1800 || !vectype_out
1801 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1802 vectype_out, vectype,
1803 &dummy_code, &dummy_code,
1804 &dummy_int, &dummy_vec))
1805 return NULL;
1807 *type_in = vectype;
1808 *type_out = vectype_out;
1810 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1811 var = vect_recog_temp_ssa_var (type, NULL);
1812 pattern_stmt
1813 = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1814 if (wstmt)
1816 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1817 new_pattern_def_seq (stmt_vinfo, wstmt);
1818 stmt_vec_info new_stmt_info
1819 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1820 set_vinfo_for_stmt (wstmt, new_stmt_info);
1821 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1824 if (dump_enabled_p ())
1825 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1827 stmts->safe_push (last_stmt);
1828 return pattern_stmt;
1831 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1833 type a_t, b_t, c_t;
1835 S0 a_t = b_t r<< c_t;
1837 Input/Output:
1839 * STMTS: Contains a stmt from which the pattern search begins,
1840 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1841 with a sequence:
1843 S1 d_t = -c_t;
1844 S2 e_t = d_t & (B - 1);
1845 S3 f_t = b_t << c_t;
1846 S4 g_t = b_t >> e_t;
1847 S0 a_t = f_t | g_t;
1849 where B is element bitsize of type.
1851 Output:
1853 * TYPE_IN: The type of the input arguments to the pattern.
1855 * TYPE_OUT: The type of the output of this pattern.
1857 * Return value: A new stmt that will be used to replace the rotate
1858 S0 stmt. */
1860 static gimple *
1861 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1863 gimple *last_stmt = stmts->pop ();
1864 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1865 gimple *pattern_stmt, *def_stmt;
1866 enum tree_code rhs_code;
1867 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1868 vec_info *vinfo = stmt_vinfo->vinfo;
1869 enum vect_def_type dt;
1870 optab optab1, optab2;
1871 edge ext_def = NULL;
1873 if (!is_gimple_assign (last_stmt))
1874 return NULL;
1876 rhs_code = gimple_assign_rhs_code (last_stmt);
1877 switch (rhs_code)
1879 case LROTATE_EXPR:
1880 case RROTATE_EXPR:
1881 break;
1882 default:
1883 return NULL;
1886 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1887 return NULL;
1889 lhs = gimple_assign_lhs (last_stmt);
1890 oprnd0 = gimple_assign_rhs1 (last_stmt);
1891 type = TREE_TYPE (oprnd0);
1892 oprnd1 = gimple_assign_rhs2 (last_stmt);
1893 if (TREE_CODE (oprnd0) != SSA_NAME
1894 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1895 || !INTEGRAL_TYPE_P (type)
1896 || !TYPE_UNSIGNED (type))
1897 return NULL;
1899 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1900 return NULL;
1902 if (dt != vect_internal_def
1903 && dt != vect_constant_def
1904 && dt != vect_external_def)
1905 return NULL;
1907 vectype = get_vectype_for_scalar_type (type);
1908 if (vectype == NULL_TREE)
1909 return NULL;
1911 /* If vector/vector or vector/scalar rotate is supported by the target,
1912 don't do anything here. */
1913 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1914 if (optab1
1915 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1916 return NULL;
1918 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1920 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1921 if (optab2
1922 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1923 return NULL;
1926 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1927 don't do anything here either. */
1928 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1929 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1930 if (!optab1
1931 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1932 || !optab2
1933 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1935 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1936 return NULL;
1937 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1938 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1939 if (!optab1
1940 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1941 || !optab2
1942 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1943 return NULL;
1946 *type_in = vectype;
1947 *type_out = vectype;
1948 if (*type_in == NULL_TREE)
1949 return NULL;
1951 if (dt == vect_external_def
1952 && TREE_CODE (oprnd1) == SSA_NAME
1953 && is_a <loop_vec_info> (vinfo))
1955 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1956 ext_def = loop_preheader_edge (loop);
1957 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1959 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1960 if (bb == NULL
1961 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1962 ext_def = NULL;
1966 def = NULL_TREE;
1967 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1968 if (TREE_CODE (oprnd1) == INTEGER_CST
1969 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1970 def = oprnd1;
1971 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1973 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1974 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1975 && TYPE_PRECISION (TREE_TYPE (rhs1))
1976 == TYPE_PRECISION (type))
1977 def = rhs1;
1980 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1981 if (def == NULL_TREE)
1983 def = vect_recog_temp_ssa_var (type, NULL);
1984 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1985 if (ext_def)
1987 basic_block new_bb
1988 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1989 gcc_assert (!new_bb);
1991 else
1992 append_pattern_def_seq (stmt_vinfo, def_stmt);
1994 stype = TREE_TYPE (def);
1995 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1997 if (TREE_CODE (def) == INTEGER_CST)
1999 if (!tree_fits_uhwi_p (def)
2000 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2001 || integer_zerop (def))
2002 return NULL;
2003 def2 = build_int_cst (stype,
2004 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2006 else
2008 tree vecstype = get_vectype_for_scalar_type (stype);
2009 stmt_vec_info def_stmt_vinfo;
2011 if (vecstype == NULL_TREE)
2012 return NULL;
2013 def2 = vect_recog_temp_ssa_var (stype, NULL);
2014 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2015 if (ext_def)
2017 basic_block new_bb
2018 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2019 gcc_assert (!new_bb);
2021 else
2023 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2024 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2025 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2026 append_pattern_def_seq (stmt_vinfo, def_stmt);
2029 def2 = vect_recog_temp_ssa_var (stype, NULL);
2030 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2031 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2032 gimple_assign_lhs (def_stmt), mask);
2033 if (ext_def)
2035 basic_block new_bb
2036 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2037 gcc_assert (!new_bb);
2039 else
2041 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2042 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2043 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2044 append_pattern_def_seq (stmt_vinfo, def_stmt);
2048 var1 = vect_recog_temp_ssa_var (type, NULL);
2049 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2050 ? LSHIFT_EXPR : RSHIFT_EXPR,
2051 oprnd0, def);
2052 append_pattern_def_seq (stmt_vinfo, def_stmt);
2054 var2 = vect_recog_temp_ssa_var (type, NULL);
2055 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2056 ? RSHIFT_EXPR : LSHIFT_EXPR,
2057 oprnd0, def2);
2058 append_pattern_def_seq (stmt_vinfo, def_stmt);
2060 /* Pattern detected. */
2061 if (dump_enabled_p ())
2062 dump_printf_loc (MSG_NOTE, vect_location,
2063 "vect_recog_rotate_pattern: detected:\n");
2065 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2066 var = vect_recog_temp_ssa_var (type, NULL);
2067 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2069 if (dump_enabled_p ())
2070 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2072 stmts->safe_push (last_stmt);
2073 return pattern_stmt;
2076 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2077 vectorized:
2079 type a_t;
2080 TYPE b_T, res_T;
2082 S1 a_t = ;
2083 S2 b_T = ;
2084 S3 res_T = b_T op a_t;
2086 where type 'TYPE' is a type with different size than 'type',
2087 and op is <<, >> or rotate.
2089 Also detect cases:
2091 type a_t;
2092 TYPE b_T, c_T, res_T;
2094 S0 c_T = ;
2095 S1 a_t = (type) c_T;
2096 S2 b_T = ;
2097 S3 res_T = b_T op a_t;
2099 Input/Output:
2101 * STMTS: Contains a stmt from which the pattern search begins,
2102 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2103 with a shift/rotate which has same type on both operands, in the
2104 second case just b_T op c_T, in the first case with added cast
2105 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2107 Output:
2109 * TYPE_IN: The type of the input arguments to the pattern.
2111 * TYPE_OUT: The type of the output of this pattern.
2113 * Return value: A new stmt that will be used to replace the shift/rotate
2114 S3 stmt. */
2116 static gimple *
2117 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2118 tree *type_in, tree *type_out)
2120 gimple *last_stmt = stmts->pop ();
2121 tree oprnd0, oprnd1, lhs, var;
2122 gimple *pattern_stmt, *def_stmt;
2123 enum tree_code rhs_code;
2124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2125 vec_info *vinfo = stmt_vinfo->vinfo;
2126 enum vect_def_type dt;
2128 if (!is_gimple_assign (last_stmt))
2129 return NULL;
2131 rhs_code = gimple_assign_rhs_code (last_stmt);
2132 switch (rhs_code)
2134 case LSHIFT_EXPR:
2135 case RSHIFT_EXPR:
2136 case LROTATE_EXPR:
2137 case RROTATE_EXPR:
2138 break;
2139 default:
2140 return NULL;
2143 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2144 return NULL;
2146 lhs = gimple_assign_lhs (last_stmt);
2147 oprnd0 = gimple_assign_rhs1 (last_stmt);
2148 oprnd1 = gimple_assign_rhs2 (last_stmt);
2149 if (TREE_CODE (oprnd0) != SSA_NAME
2150 || TREE_CODE (oprnd1) != SSA_NAME
2151 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2152 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2153 || TYPE_PRECISION (TREE_TYPE (lhs))
2154 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2155 return NULL;
2157 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2158 return NULL;
2160 if (dt != vect_internal_def)
2161 return NULL;
2163 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2164 *type_out = *type_in;
2165 if (*type_in == NULL_TREE)
2166 return NULL;
2168 tree def = NULL_TREE;
2169 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2170 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2172 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2173 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2174 && TYPE_PRECISION (TREE_TYPE (rhs1))
2175 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2177 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2178 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2179 def = rhs1;
2180 else
2182 tree mask
2183 = build_low_bits_mask (TREE_TYPE (rhs1),
2184 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2185 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2186 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2187 new_pattern_def_seq (stmt_vinfo, def_stmt);
2192 if (def == NULL_TREE)
2194 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2195 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2196 new_pattern_def_seq (stmt_vinfo, def_stmt);
2199 /* Pattern detected. */
2200 if (dump_enabled_p ())
2201 dump_printf_loc (MSG_NOTE, vect_location,
2202 "vect_recog_vector_vector_shift_pattern: detected:\n");
2204 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2205 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2206 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2208 if (dump_enabled_p ())
2209 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2211 stmts->safe_push (last_stmt);
2212 return pattern_stmt;
2215 /* Return true iff the target has a vector optab implementing the operation
2216 CODE on type VECTYPE. */
2218 static bool
2219 target_has_vecop_for_code (tree_code code, tree vectype)
2221 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2222 return voptab
2223 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2226 /* Verify that the target has optabs of VECTYPE to perform all the steps
2227 needed by the multiplication-by-immediate synthesis algorithm described by
2228 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2229 present. Return true iff the target supports all the steps. */
2231 static bool
2232 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2233 tree vectype, bool synth_shift_p)
2235 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2236 return false;
2238 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2239 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2241 if (var == negate_variant
2242 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2243 return false;
2245 /* If we must synthesize shifts with additions make sure that vector
2246 addition is available. */
2247 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2248 return false;
2250 for (int i = 1; i < alg->ops; i++)
2252 switch (alg->op[i])
2254 case alg_shift:
2255 break;
2256 case alg_add_t_m2:
2257 case alg_add_t2_m:
2258 case alg_add_factor:
2259 if (!supports_vplus)
2260 return false;
2261 break;
2262 case alg_sub_t_m2:
2263 case alg_sub_t2_m:
2264 case alg_sub_factor:
2265 if (!supports_vminus)
2266 return false;
2267 break;
2268 case alg_unknown:
2269 case alg_m:
2270 case alg_zero:
2271 case alg_impossible:
2272 return false;
2273 default:
2274 gcc_unreachable ();
2278 return true;
2281 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2282 putting the final result in DEST. Append all statements but the last into
2283 VINFO. Return the last statement. */
2285 static gimple *
2286 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2287 stmt_vec_info vinfo)
2289 HOST_WIDE_INT i;
2290 tree itype = TREE_TYPE (op);
2291 tree prev_res = op;
2292 gcc_assert (amnt >= 0);
2293 for (i = 0; i < amnt; i++)
2295 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2296 : dest;
2297 gimple *stmt
2298 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2299 prev_res = tmp_var;
2300 if (i < amnt - 1)
2301 append_pattern_def_seq (vinfo, stmt);
2302 else
2303 return stmt;
2305 gcc_unreachable ();
2306 return NULL;
2309 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2310 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2311 the process if necessary. Append the resulting assignment statements
2312 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2313 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2314 left shifts using additions. */
2316 static tree
2317 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2318 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2320 if (integer_zerop (op2)
2321 && (code == LSHIFT_EXPR
2322 || code == PLUS_EXPR))
2324 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2325 return op1;
2328 gimple *stmt;
2329 tree itype = TREE_TYPE (op1);
2330 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2332 if (code == LSHIFT_EXPR
2333 && synth_shift_p)
2335 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2336 stmt_vinfo);
2337 append_pattern_def_seq (stmt_vinfo, stmt);
2338 return tmp_var;
2341 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2342 append_pattern_def_seq (stmt_vinfo, stmt);
2343 return tmp_var;
2346 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2347 and simple arithmetic operations to be vectorized. Record the statements
2348 produced in STMT_VINFO and return the last statement in the sequence or
2349 NULL if it's not possible to synthesize such a multiplication.
2350 This function mirrors the behavior of expand_mult_const in expmed.c but
2351 works on tree-ssa form. */
2353 static gimple *
2354 vect_synth_mult_by_constant (tree op, tree val,
2355 stmt_vec_info stmt_vinfo)
2357 tree itype = TREE_TYPE (op);
2358 machine_mode mode = TYPE_MODE (itype);
2359 struct algorithm alg;
2360 mult_variant variant;
2361 if (!tree_fits_shwi_p (val))
2362 return NULL;
2364 /* Multiplication synthesis by shifts, adds and subs can introduce
2365 signed overflow where the original operation didn't. Perform the
2366 operations on an unsigned type and cast back to avoid this.
2367 In the future we may want to relax this for synthesis algorithms
2368 that we can prove do not cause unexpected overflow. */
2369 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2371 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2373 /* Targets that don't support vector shifts but support vector additions
2374 can synthesize shifts that way. */
2375 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2377 HOST_WIDE_INT hwval = tree_to_shwi (val);
2378 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2379 The vectorizer's benefit analysis will decide whether it's beneficial
2380 to do this. */
2381 bool possible = choose_mult_variant (mode, hwval, &alg,
2382 &variant, MAX_COST);
2383 if (!possible)
2384 return NULL;
2386 tree vectype = get_vectype_for_scalar_type (multtype);
2388 if (!vectype
2389 || !target_supports_mult_synth_alg (&alg, variant,
2390 vectype, synth_shift_p))
2391 return NULL;
2393 tree accumulator;
2395 /* Clear out the sequence of statements so we can populate it below. */
2396 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2397 gimple *stmt = NULL;
2399 if (cast_to_unsigned_p)
2401 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2402 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2403 append_pattern_def_seq (stmt_vinfo, stmt);
2404 op = tmp_op;
2407 if (alg.op[0] == alg_zero)
2408 accumulator = build_int_cst (multtype, 0);
2409 else
2410 accumulator = op;
2412 bool needs_fixup = (variant == negate_variant)
2413 || (variant == add_variant);
2415 for (int i = 1; i < alg.ops; i++)
2417 tree shft_log = build_int_cst (multtype, alg.log[i]);
2418 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2419 tree tmp_var = NULL_TREE;
2421 switch (alg.op[i])
2423 case alg_shift:
2424 if (synth_shift_p)
2425 stmt
2426 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2427 stmt_vinfo);
2428 else
2429 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2430 shft_log);
2431 break;
2432 case alg_add_t_m2:
2433 tmp_var
2434 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2435 stmt_vinfo, synth_shift_p);
2436 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2437 tmp_var);
2438 break;
2439 case alg_sub_t_m2:
2440 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2441 shft_log, stmt_vinfo,
2442 synth_shift_p);
2443 /* In some algorithms the first step involves zeroing the
2444 accumulator. If subtracting from such an accumulator
2445 just emit the negation directly. */
2446 if (integer_zerop (accumulator))
2447 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2448 else
2449 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2450 tmp_var);
2451 break;
2452 case alg_add_t2_m:
2453 tmp_var
2454 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2455 stmt_vinfo, synth_shift_p);
2456 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2457 break;
2458 case alg_sub_t2_m:
2459 tmp_var
2460 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2461 stmt_vinfo, synth_shift_p);
2462 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2463 break;
2464 case alg_add_factor:
2465 tmp_var
2466 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2467 stmt_vinfo, synth_shift_p);
2468 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2469 tmp_var);
2470 break;
2471 case alg_sub_factor:
2472 tmp_var
2473 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2474 stmt_vinfo, synth_shift_p);
2475 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2476 accumulator);
2477 break;
2478 default:
2479 gcc_unreachable ();
2481 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2482 but rather return it directly. */
2484 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2485 append_pattern_def_seq (stmt_vinfo, stmt);
2486 accumulator = accum_tmp;
2488 if (variant == negate_variant)
2490 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2491 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2492 accumulator = accum_tmp;
2493 if (cast_to_unsigned_p)
2494 append_pattern_def_seq (stmt_vinfo, stmt);
2496 else if (variant == add_variant)
2498 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2499 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2500 accumulator = accum_tmp;
2501 if (cast_to_unsigned_p)
2502 append_pattern_def_seq (stmt_vinfo, stmt);
2504 /* Move back to a signed if needed. */
2505 if (cast_to_unsigned_p)
2507 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2508 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2511 return stmt;
2514 /* Detect multiplication by constant and convert it into a sequence of
2515 shifts and additions, subtractions, negations. We reuse the
2516 choose_mult_variant algorithms from expmed.c
2518 Input/Output:
2520 STMTS: Contains a stmt from which the pattern search begins,
2521 i.e. the mult stmt.
2523 Output:
2525 * TYPE_IN: The type of the input arguments to the pattern.
2527 * TYPE_OUT: The type of the output of this pattern.
2529 * Return value: A new stmt that will be used to replace
2530 the multiplication. */
2532 static gimple *
2533 vect_recog_mult_pattern (vec<gimple *> *stmts,
2534 tree *type_in, tree *type_out)
2536 gimple *last_stmt = stmts->pop ();
2537 tree oprnd0, oprnd1, vectype, itype;
2538 gimple *pattern_stmt;
2539 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2541 if (!is_gimple_assign (last_stmt))
2542 return NULL;
2544 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2545 return NULL;
2547 oprnd0 = gimple_assign_rhs1 (last_stmt);
2548 oprnd1 = gimple_assign_rhs2 (last_stmt);
2549 itype = TREE_TYPE (oprnd0);
2551 if (TREE_CODE (oprnd0) != SSA_NAME
2552 || TREE_CODE (oprnd1) != INTEGER_CST
2553 || !INTEGRAL_TYPE_P (itype)
2554 || !type_has_mode_precision_p (itype))
2555 return NULL;
2557 vectype = get_vectype_for_scalar_type (itype);
2558 if (vectype == NULL_TREE)
2559 return NULL;
2561 /* If the target can handle vectorized multiplication natively,
2562 don't attempt to optimize this. */
2563 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2564 if (mul_optab != unknown_optab)
2566 machine_mode vec_mode = TYPE_MODE (vectype);
2567 int icode = (int) optab_handler (mul_optab, vec_mode);
2568 if (icode != CODE_FOR_nothing)
2569 return NULL;
2572 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2573 if (!pattern_stmt)
2574 return NULL;
2576 /* Pattern detected. */
2577 if (dump_enabled_p ())
2578 dump_printf_loc (MSG_NOTE, vect_location,
2579 "vect_recog_mult_pattern: detected:\n");
2581 if (dump_enabled_p ())
2582 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2583 pattern_stmt,0);
2585 stmts->safe_push (last_stmt);
2586 *type_in = vectype;
2587 *type_out = vectype;
2589 return pattern_stmt;
2592 /* Detect a signed division by a constant that wouldn't be
2593 otherwise vectorized:
2595 type a_t, b_t;
2597 S1 a_t = b_t / N;
2599 where type 'type' is an integral type and N is a constant.
2601 Similarly handle modulo by a constant:
2603 S4 a_t = b_t % N;
2605 Input/Output:
2607 * STMTS: Contains a stmt from which the pattern search begins,
2608 i.e. the division stmt. S1 is replaced by if N is a power
2609 of two constant and type is signed:
2610 S3 y_t = b_t < 0 ? N - 1 : 0;
2611 S2 x_t = b_t + y_t;
2612 S1' a_t = x_t >> log2 (N);
2614 S4 is replaced if N is a power of two constant and
2615 type is signed by (where *_T temporaries have unsigned type):
2616 S9 y_T = b_t < 0 ? -1U : 0U;
2617 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2618 S7 z_t = (type) z_T;
2619 S6 w_t = b_t + z_t;
2620 S5 x_t = w_t & (N - 1);
2621 S4' a_t = x_t - z_t;
2623 Output:
2625 * TYPE_IN: The type of the input arguments to the pattern.
2627 * TYPE_OUT: The type of the output of this pattern.
2629 * Return value: A new stmt that will be used to replace the division
2630 S1 or modulo S4 stmt. */
2632 static gimple *
2633 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2634 tree *type_in, tree *type_out)
2636 gimple *last_stmt = stmts->pop ();
2637 tree oprnd0, oprnd1, vectype, itype, cond;
2638 gimple *pattern_stmt, *def_stmt;
2639 enum tree_code rhs_code;
2640 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2641 vec_info *vinfo = stmt_vinfo->vinfo;
2642 optab optab;
2643 tree q;
2644 int dummy_int, prec;
2645 stmt_vec_info def_stmt_vinfo;
2647 if (!is_gimple_assign (last_stmt))
2648 return NULL;
2650 rhs_code = gimple_assign_rhs_code (last_stmt);
2651 switch (rhs_code)
2653 case TRUNC_DIV_EXPR:
2654 case TRUNC_MOD_EXPR:
2655 break;
2656 default:
2657 return NULL;
2660 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2661 return NULL;
2663 oprnd0 = gimple_assign_rhs1 (last_stmt);
2664 oprnd1 = gimple_assign_rhs2 (last_stmt);
2665 itype = TREE_TYPE (oprnd0);
2666 if (TREE_CODE (oprnd0) != SSA_NAME
2667 || TREE_CODE (oprnd1) != INTEGER_CST
2668 || TREE_CODE (itype) != INTEGER_TYPE
2669 || !type_has_mode_precision_p (itype))
2670 return NULL;
2672 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2673 vectype = get_vectype_for_scalar_type (itype);
2674 if (vectype == NULL_TREE)
2675 return NULL;
2677 /* If the target can handle vectorized division or modulo natively,
2678 don't attempt to optimize this. */
2679 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2680 if (optab != unknown_optab)
2682 machine_mode vec_mode = TYPE_MODE (vectype);
2683 int icode = (int) optab_handler (optab, vec_mode);
2684 if (icode != CODE_FOR_nothing)
2685 return NULL;
2688 prec = TYPE_PRECISION (itype);
2689 if (integer_pow2p (oprnd1))
2691 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2692 return NULL;
2694 /* Pattern detected. */
2695 if (dump_enabled_p ())
2696 dump_printf_loc (MSG_NOTE, vect_location,
2697 "vect_recog_divmod_pattern: detected:\n");
2699 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2700 build_int_cst (itype, 0));
2701 if (rhs_code == TRUNC_DIV_EXPR)
2703 tree var = vect_recog_temp_ssa_var (itype, NULL);
2704 tree shift;
2705 def_stmt
2706 = gimple_build_assign (var, COND_EXPR, cond,
2707 fold_build2 (MINUS_EXPR, itype, oprnd1,
2708 build_int_cst (itype, 1)),
2709 build_int_cst (itype, 0));
2710 new_pattern_def_seq (stmt_vinfo, def_stmt);
2711 var = vect_recog_temp_ssa_var (itype, NULL);
2712 def_stmt
2713 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2714 gimple_assign_lhs (def_stmt));
2715 append_pattern_def_seq (stmt_vinfo, def_stmt);
2717 shift = build_int_cst (itype, tree_log2 (oprnd1));
2718 pattern_stmt
2719 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2720 RSHIFT_EXPR, var, shift);
2722 else
2724 tree signmask;
2725 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2726 if (compare_tree_int (oprnd1, 2) == 0)
2728 signmask = vect_recog_temp_ssa_var (itype, NULL);
2729 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2730 build_int_cst (itype, 1),
2731 build_int_cst (itype, 0));
2732 append_pattern_def_seq (stmt_vinfo, def_stmt);
2734 else
2736 tree utype
2737 = build_nonstandard_integer_type (prec, 1);
2738 tree vecutype = get_vectype_for_scalar_type (utype);
2739 tree shift
2740 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2741 - tree_log2 (oprnd1));
2742 tree var = vect_recog_temp_ssa_var (utype, NULL);
2744 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2745 build_int_cst (utype, -1),
2746 build_int_cst (utype, 0));
2747 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2748 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2749 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2750 append_pattern_def_seq (stmt_vinfo, def_stmt);
2751 var = vect_recog_temp_ssa_var (utype, NULL);
2752 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2753 gimple_assign_lhs (def_stmt),
2754 shift);
2755 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2756 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2757 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2758 append_pattern_def_seq (stmt_vinfo, def_stmt);
2759 signmask = vect_recog_temp_ssa_var (itype, NULL);
2760 def_stmt
2761 = gimple_build_assign (signmask, NOP_EXPR, var);
2762 append_pattern_def_seq (stmt_vinfo, def_stmt);
2764 def_stmt
2765 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2766 PLUS_EXPR, oprnd0, signmask);
2767 append_pattern_def_seq (stmt_vinfo, def_stmt);
2768 def_stmt
2769 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2770 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2771 fold_build2 (MINUS_EXPR, itype, oprnd1,
2772 build_int_cst (itype, 1)));
2773 append_pattern_def_seq (stmt_vinfo, def_stmt);
2775 pattern_stmt
2776 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2777 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2778 signmask);
2781 if (dump_enabled_p ())
2782 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2785 stmts->safe_push (last_stmt);
2787 *type_in = vectype;
2788 *type_out = vectype;
2789 return pattern_stmt;
2792 if (prec > HOST_BITS_PER_WIDE_INT
2793 || integer_zerop (oprnd1))
2794 return NULL;
2796 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2797 return NULL;
2799 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2801 if (TYPE_UNSIGNED (itype))
2803 unsigned HOST_WIDE_INT mh, ml;
2804 int pre_shift, post_shift;
2805 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2806 & GET_MODE_MASK (itype_mode));
2807 tree t1, t2, t3, t4;
2809 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2810 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2811 return NULL;
2813 /* Find a suitable multiplier and right shift count
2814 instead of multiplying with D. */
2815 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2817 /* If the suggested multiplier is more than SIZE bits, we can do better
2818 for even divisors, using an initial right shift. */
2819 if (mh != 0 && (d & 1) == 0)
2821 pre_shift = ctz_or_zero (d);
2822 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2823 &ml, &post_shift, &dummy_int);
2824 gcc_assert (!mh);
2826 else
2827 pre_shift = 0;
2829 if (mh != 0)
2831 if (post_shift - 1 >= prec)
2832 return NULL;
2834 /* t1 = oprnd0 h* ml;
2835 t2 = oprnd0 - t1;
2836 t3 = t2 >> 1;
2837 t4 = t1 + t3;
2838 q = t4 >> (post_shift - 1); */
2839 t1 = vect_recog_temp_ssa_var (itype, NULL);
2840 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2841 build_int_cst (itype, ml));
2842 append_pattern_def_seq (stmt_vinfo, def_stmt);
2844 t2 = vect_recog_temp_ssa_var (itype, NULL);
2845 def_stmt
2846 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2847 append_pattern_def_seq (stmt_vinfo, def_stmt);
2849 t3 = vect_recog_temp_ssa_var (itype, NULL);
2850 def_stmt
2851 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2852 append_pattern_def_seq (stmt_vinfo, def_stmt);
2854 t4 = vect_recog_temp_ssa_var (itype, NULL);
2855 def_stmt
2856 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2858 if (post_shift != 1)
2860 append_pattern_def_seq (stmt_vinfo, def_stmt);
2862 q = vect_recog_temp_ssa_var (itype, NULL);
2863 pattern_stmt
2864 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2865 build_int_cst (itype, post_shift - 1));
2867 else
2869 q = t4;
2870 pattern_stmt = def_stmt;
2873 else
2875 if (pre_shift >= prec || post_shift >= prec)
2876 return NULL;
2878 /* t1 = oprnd0 >> pre_shift;
2879 t2 = t1 h* ml;
2880 q = t2 >> post_shift; */
2881 if (pre_shift)
2883 t1 = vect_recog_temp_ssa_var (itype, NULL);
2884 def_stmt
2885 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2886 build_int_cst (NULL, pre_shift));
2887 append_pattern_def_seq (stmt_vinfo, def_stmt);
2889 else
2890 t1 = oprnd0;
2892 t2 = vect_recog_temp_ssa_var (itype, NULL);
2893 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2894 build_int_cst (itype, ml));
2896 if (post_shift)
2898 append_pattern_def_seq (stmt_vinfo, def_stmt);
2900 q = vect_recog_temp_ssa_var (itype, NULL);
2901 def_stmt
2902 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2903 build_int_cst (itype, post_shift));
2905 else
2906 q = t2;
2908 pattern_stmt = def_stmt;
2911 else
2913 unsigned HOST_WIDE_INT ml;
2914 int post_shift;
2915 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2916 unsigned HOST_WIDE_INT abs_d;
2917 bool add = false;
2918 tree t1, t2, t3, t4;
2920 /* Give up for -1. */
2921 if (d == -1)
2922 return NULL;
2924 /* Since d might be INT_MIN, we have to cast to
2925 unsigned HOST_WIDE_INT before negating to avoid
2926 undefined signed overflow. */
2927 abs_d = (d >= 0
2928 ? (unsigned HOST_WIDE_INT) d
2929 : - (unsigned HOST_WIDE_INT) d);
2931 /* n rem d = n rem -d */
2932 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2934 d = abs_d;
2935 oprnd1 = build_int_cst (itype, abs_d);
2937 else if (HOST_BITS_PER_WIDE_INT >= prec
2938 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2939 /* This case is not handled correctly below. */
2940 return NULL;
2942 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2943 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2945 add = true;
2946 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2948 if (post_shift >= prec)
2949 return NULL;
2951 /* t1 = oprnd0 h* ml; */
2952 t1 = vect_recog_temp_ssa_var (itype, NULL);
2953 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2954 build_int_cst (itype, ml));
2956 if (add)
2958 /* t2 = t1 + oprnd0; */
2959 append_pattern_def_seq (stmt_vinfo, def_stmt);
2960 t2 = vect_recog_temp_ssa_var (itype, NULL);
2961 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2963 else
2964 t2 = t1;
2966 if (post_shift)
2968 /* t3 = t2 >> post_shift; */
2969 append_pattern_def_seq (stmt_vinfo, def_stmt);
2970 t3 = vect_recog_temp_ssa_var (itype, NULL);
2971 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2972 build_int_cst (itype, post_shift));
2974 else
2975 t3 = t2;
2977 wide_int oprnd0_min, oprnd0_max;
2978 int msb = 1;
2979 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2981 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2982 msb = 0;
2983 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2984 msb = -1;
2987 if (msb == 0 && d >= 0)
2989 /* q = t3; */
2990 q = t3;
2991 pattern_stmt = def_stmt;
2993 else
2995 /* t4 = oprnd0 >> (prec - 1);
2996 or if we know from VRP that oprnd0 >= 0
2997 t4 = 0;
2998 or if we know from VRP that oprnd0 < 0
2999 t4 = -1; */
3000 append_pattern_def_seq (stmt_vinfo, def_stmt);
3001 t4 = vect_recog_temp_ssa_var (itype, NULL);
3002 if (msb != 1)
3003 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3004 build_int_cst (itype, msb));
3005 else
3006 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3007 build_int_cst (itype, prec - 1));
3008 append_pattern_def_seq (stmt_vinfo, def_stmt);
3010 /* q = t3 - t4; or q = t4 - t3; */
3011 q = vect_recog_temp_ssa_var (itype, NULL);
3012 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3013 d < 0 ? t3 : t4);
3017 if (rhs_code == TRUNC_MOD_EXPR)
3019 tree r, t1;
3021 /* We divided. Now finish by:
3022 t1 = q * oprnd1;
3023 r = oprnd0 - t1; */
3024 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3026 t1 = vect_recog_temp_ssa_var (itype, NULL);
3027 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3028 append_pattern_def_seq (stmt_vinfo, def_stmt);
3030 r = vect_recog_temp_ssa_var (itype, NULL);
3031 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3034 /* Pattern detected. */
3035 if (dump_enabled_p ())
3037 dump_printf_loc (MSG_NOTE, vect_location,
3038 "vect_recog_divmod_pattern: detected: ");
3039 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3042 stmts->safe_push (last_stmt);
3044 *type_in = vectype;
3045 *type_out = vectype;
3046 return pattern_stmt;
3049 /* Function vect_recog_mixed_size_cond_pattern
3051 Try to find the following pattern:
3053 type x_t, y_t;
3054 TYPE a_T, b_T, c_T;
3055 loop:
3056 S1 a_T = x_t CMP y_t ? b_T : c_T;
3058 where type 'TYPE' is an integral type which has different size
3059 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3060 than 'type', the constants need to fit into an integer type
3061 with the same width as 'type') or results of conversion from 'type'.
3063 Input:
3065 * LAST_STMT: A stmt from which the pattern search begins.
3067 Output:
3069 * TYPE_IN: The type of the input arguments to the pattern.
3071 * TYPE_OUT: The type of the output of this pattern.
3073 * Return value: A new stmt that will be used to replace the pattern.
3074 Additionally a def_stmt is added.
3076 a_it = x_t CMP y_t ? b_it : c_it;
3077 a_T = (TYPE) a_it; */
3079 static gimple *
3080 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
3081 tree *type_out)
3083 gimple *last_stmt = (*stmts)[0];
3084 tree cond_expr, then_clause, else_clause;
3085 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3086 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3087 gimple *pattern_stmt, *def_stmt;
3088 vec_info *vinfo = stmt_vinfo->vinfo;
3089 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3090 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3091 bool promotion;
3092 tree comp_scalar_type;
3094 if (!is_gimple_assign (last_stmt)
3095 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3096 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3097 return NULL;
3099 cond_expr = gimple_assign_rhs1 (last_stmt);
3100 then_clause = gimple_assign_rhs2 (last_stmt);
3101 else_clause = gimple_assign_rhs3 (last_stmt);
3103 if (!COMPARISON_CLASS_P (cond_expr))
3104 return NULL;
3106 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3107 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3108 if (comp_vectype == NULL_TREE)
3109 return NULL;
3111 type = gimple_expr_type (last_stmt);
3112 if (types_compatible_p (type, comp_scalar_type)
3113 || ((TREE_CODE (then_clause) != INTEGER_CST
3114 || TREE_CODE (else_clause) != INTEGER_CST)
3115 && !INTEGRAL_TYPE_P (comp_scalar_type))
3116 || !INTEGRAL_TYPE_P (type))
3117 return NULL;
3119 if ((TREE_CODE (then_clause) != INTEGER_CST
3120 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3121 &def_stmt0, &promotion))
3122 || (TREE_CODE (else_clause) != INTEGER_CST
3123 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3124 &def_stmt1, &promotion)))
3125 return NULL;
3127 if (orig_type0 && orig_type1
3128 && !types_compatible_p (orig_type0, orig_type1))
3129 return NULL;
3131 if (orig_type0)
3133 if (!types_compatible_p (orig_type0, comp_scalar_type))
3134 return NULL;
3135 then_clause = gimple_assign_rhs1 (def_stmt0);
3136 itype = orig_type0;
3139 if (orig_type1)
3141 if (!types_compatible_p (orig_type1, comp_scalar_type))
3142 return NULL;
3143 else_clause = gimple_assign_rhs1 (def_stmt1);
3144 itype = orig_type1;
3148 HOST_WIDE_INT cmp_mode_size
3149 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3151 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3152 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3153 return NULL;
3155 vectype = get_vectype_for_scalar_type (type);
3156 if (vectype == NULL_TREE)
3157 return NULL;
3159 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3160 return NULL;
3162 if (itype == NULL_TREE)
3163 itype = build_nonstandard_integer_type (cmp_mode_size,
3164 TYPE_UNSIGNED (type));
3166 if (itype == NULL_TREE
3167 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3168 return NULL;
3170 vecitype = get_vectype_for_scalar_type (itype);
3171 if (vecitype == NULL_TREE)
3172 return NULL;
3174 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3175 return NULL;
3177 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3179 if ((TREE_CODE (then_clause) == INTEGER_CST
3180 && !int_fits_type_p (then_clause, itype))
3181 || (TREE_CODE (else_clause) == INTEGER_CST
3182 && !int_fits_type_p (else_clause, itype)))
3183 return NULL;
3186 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3187 COND_EXPR, unshare_expr (cond_expr),
3188 fold_convert (itype, then_clause),
3189 fold_convert (itype, else_clause));
3190 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3191 NOP_EXPR, gimple_assign_lhs (def_stmt));
3193 new_pattern_def_seq (stmt_vinfo, def_stmt);
3194 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3195 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3196 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3197 *type_in = vecitype;
3198 *type_out = vectype;
3200 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_NOTE, vect_location,
3202 "vect_recog_mixed_size_cond_pattern: detected:\n");
3204 return pattern_stmt;
3208 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3209 true if bool VAR can and should be optimized that way. Assume it shouldn't
3210 in case it's a result of a comparison which can be directly vectorized into
3211 a vector comparison. Fills in STMTS with all stmts visited during the
3212 walk. */
3214 static bool
3215 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3217 gimple *def_stmt;
3218 enum vect_def_type dt;
3219 tree rhs1;
3220 enum tree_code rhs_code;
3222 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3223 return false;
3225 if (dt != vect_internal_def)
3226 return false;
3228 if (!is_gimple_assign (def_stmt))
3229 return false;
3231 if (stmts.contains (def_stmt))
3232 return true;
3234 rhs1 = gimple_assign_rhs1 (def_stmt);
3235 rhs_code = gimple_assign_rhs_code (def_stmt);
3236 switch (rhs_code)
3238 case SSA_NAME:
3239 if (! check_bool_pattern (rhs1, vinfo, stmts))
3240 return false;
3241 break;
3243 CASE_CONVERT:
3244 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3245 return false;
3246 if (! check_bool_pattern (rhs1, vinfo, stmts))
3247 return false;
3248 break;
3250 case BIT_NOT_EXPR:
3251 if (! check_bool_pattern (rhs1, vinfo, stmts))
3252 return false;
3253 break;
3255 case BIT_AND_EXPR:
3256 case BIT_IOR_EXPR:
3257 case BIT_XOR_EXPR:
3258 if (! check_bool_pattern (rhs1, vinfo, stmts)
3259 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3260 return false;
3261 break;
3263 default:
3264 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3266 tree vecitype, comp_vectype;
3268 /* If the comparison can throw, then is_gimple_condexpr will be
3269 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3270 if (stmt_could_throw_p (def_stmt))
3271 return false;
3273 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3274 if (comp_vectype == NULL_TREE)
3275 return false;
3277 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3278 if (mask_type
3279 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3280 return false;
3282 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3284 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3285 tree itype
3286 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3287 vecitype = get_vectype_for_scalar_type (itype);
3288 if (vecitype == NULL_TREE)
3289 return false;
3291 else
3292 vecitype = comp_vectype;
3293 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3294 return false;
3296 else
3297 return false;
3298 break;
3301 bool res = stmts.add (def_stmt);
3302 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3303 gcc_assert (!res);
3305 return true;
3309 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3310 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3311 pattern sequence. */
3313 static tree
3314 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3316 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3317 NOP_EXPR, var);
3318 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3319 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3320 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3321 append_pattern_def_seq (stmt_info, cast_stmt);
3322 return gimple_assign_lhs (cast_stmt);
3325 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3326 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3327 type, OUT_TYPE is the desired final integer type of the whole pattern.
3328 STMT_INFO is the info of the pattern root and is where pattern stmts should
3329 be associated with. DEFS is a map of pattern defs. */
3331 static void
3332 adjust_bool_pattern (tree var, tree out_type,
3333 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3335 gimple *stmt = SSA_NAME_DEF_STMT (var);
3336 enum tree_code rhs_code, def_rhs_code;
3337 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3338 location_t loc;
3339 gimple *pattern_stmt, *def_stmt;
3340 tree trueval = NULL_TREE;
3342 rhs1 = gimple_assign_rhs1 (stmt);
3343 rhs2 = gimple_assign_rhs2 (stmt);
3344 rhs_code = gimple_assign_rhs_code (stmt);
3345 loc = gimple_location (stmt);
3346 switch (rhs_code)
3348 case SSA_NAME:
3349 CASE_CONVERT:
3350 irhs1 = *defs.get (rhs1);
3351 itype = TREE_TYPE (irhs1);
3352 pattern_stmt
3353 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3354 SSA_NAME, irhs1);
3355 break;
3357 case BIT_NOT_EXPR:
3358 irhs1 = *defs.get (rhs1);
3359 itype = TREE_TYPE (irhs1);
3360 pattern_stmt
3361 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3362 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3363 break;
3365 case BIT_AND_EXPR:
3366 /* Try to optimize x = y & (a < b ? 1 : 0); into
3367 x = (a < b ? y : 0);
3369 E.g. for:
3370 bool a_b, b_b, c_b;
3371 TYPE d_T;
3373 S1 a_b = x1 CMP1 y1;
3374 S2 b_b = x2 CMP2 y2;
3375 S3 c_b = a_b & b_b;
3376 S4 d_T = (TYPE) c_b;
3378 we would normally emit:
3380 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3381 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3382 S3' c_T = a_T & b_T;
3383 S4' d_T = c_T;
3385 but we can save one stmt by using the
3386 result of one of the COND_EXPRs in the other COND_EXPR and leave
3387 BIT_AND_EXPR stmt out:
3389 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3390 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3391 S4' f_T = c_T;
3393 At least when VEC_COND_EXPR is implemented using masks
3394 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3395 computes the comparison masks and ands it, in one case with
3396 all ones vector, in the other case with a vector register.
3397 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3398 often more expensive. */
3399 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3400 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3401 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3403 irhs1 = *defs.get (rhs1);
3404 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3405 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3406 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3408 rhs_code = def_rhs_code;
3409 rhs1 = def_rhs1;
3410 rhs2 = gimple_assign_rhs2 (def_stmt);
3411 trueval = irhs1;
3412 goto do_compare;
3414 else
3415 irhs2 = *defs.get (rhs2);
3416 goto and_ior_xor;
3418 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3419 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3420 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3422 irhs2 = *defs.get (rhs2);
3423 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3424 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3425 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3427 rhs_code = def_rhs_code;
3428 rhs1 = def_rhs1;
3429 rhs2 = gimple_assign_rhs2 (def_stmt);
3430 trueval = irhs2;
3431 goto do_compare;
3433 else
3434 irhs1 = *defs.get (rhs1);
3435 goto and_ior_xor;
3437 /* FALLTHRU */
3438 case BIT_IOR_EXPR:
3439 case BIT_XOR_EXPR:
3440 irhs1 = *defs.get (rhs1);
3441 irhs2 = *defs.get (rhs2);
3442 and_ior_xor:
3443 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3444 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3446 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3447 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3448 int out_prec = TYPE_PRECISION (out_type);
3449 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3450 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3451 stmt_info);
3452 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3453 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3454 stmt_info);
3455 else
3457 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3458 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3461 itype = TREE_TYPE (irhs1);
3462 pattern_stmt
3463 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3464 rhs_code, irhs1, irhs2);
3465 break;
3467 default:
3468 do_compare:
3469 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3470 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3471 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3472 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3473 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3475 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3476 itype
3477 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3479 else
3480 itype = TREE_TYPE (rhs1);
3481 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3482 if (trueval == NULL_TREE)
3483 trueval = build_int_cst (itype, 1);
3484 else
3485 gcc_checking_assert (useless_type_conversion_p (itype,
3486 TREE_TYPE (trueval)));
3487 pattern_stmt
3488 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3489 COND_EXPR, cond_expr, trueval,
3490 build_int_cst (itype, 0));
3491 break;
3494 gimple_set_location (pattern_stmt, loc);
3495 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3496 pattern def seq stmts instead of just letting auto-detection do
3497 its work? */
3498 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3499 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3500 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3501 append_pattern_def_seq (stmt_info, pattern_stmt);
3502 defs.put (var, gimple_assign_lhs (pattern_stmt));
3505 /* Comparison function to qsort a vector of gimple stmts after UID. */
3507 static int
3508 sort_after_uid (const void *p1, const void *p2)
3510 const gimple *stmt1 = *(const gimple * const *)p1;
3511 const gimple *stmt2 = *(const gimple * const *)p2;
3512 return gimple_uid (stmt1) - gimple_uid (stmt2);
3515 /* Create pattern stmts for all stmts participating in the bool pattern
3516 specified by BOOL_STMT_SET and its root STMT with the desired type
3517 OUT_TYPE. Return the def of the pattern root. */
3519 static tree
3520 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3521 tree out_type, gimple *stmt)
3523 /* Gather original stmts in the bool pattern in their order of appearance
3524 in the IL. */
3525 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3526 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3527 i != bool_stmt_set.end (); ++i)
3528 bool_stmts.quick_push (*i);
3529 bool_stmts.qsort (sort_after_uid);
3531 /* Now process them in that order, producing pattern stmts. */
3532 hash_map <tree, tree> defs;
3533 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3534 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3535 out_type, vinfo_for_stmt (stmt), defs);
3537 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3538 gimple *pattern_stmt
3539 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3540 return gimple_assign_lhs (pattern_stmt);
3543 /* Helper for search_type_for_mask. */
3545 static tree
3546 search_type_for_mask_1 (tree var, vec_info *vinfo,
3547 hash_map<gimple *, tree> &cache)
3549 gimple *def_stmt;
3550 enum vect_def_type dt;
3551 tree rhs1;
3552 enum tree_code rhs_code;
3553 tree res = NULL_TREE, res2;
3555 if (TREE_CODE (var) != SSA_NAME)
3556 return NULL_TREE;
3558 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3559 return NULL_TREE;
3561 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3562 return NULL_TREE;
3564 if (dt != vect_internal_def)
3565 return NULL_TREE;
3567 if (!is_gimple_assign (def_stmt))
3568 return NULL_TREE;
3570 tree *c = cache.get (def_stmt);
3571 if (c)
3572 return *c;
3574 rhs_code = gimple_assign_rhs_code (def_stmt);
3575 rhs1 = gimple_assign_rhs1 (def_stmt);
3577 switch (rhs_code)
3579 case SSA_NAME:
3580 case BIT_NOT_EXPR:
3581 CASE_CONVERT:
3582 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3583 break;
3585 case BIT_AND_EXPR:
3586 case BIT_IOR_EXPR:
3587 case BIT_XOR_EXPR:
3588 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3589 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3590 cache);
3591 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3592 res = res2;
3593 break;
3595 default:
3596 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3598 tree comp_vectype, mask_type;
3600 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3602 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3603 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3604 vinfo, cache);
3605 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3606 res = res2;
3607 break;
3610 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3611 if (comp_vectype == NULL_TREE)
3613 res = NULL_TREE;
3614 break;
3617 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3618 if (!mask_type
3619 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3621 res = NULL_TREE;
3622 break;
3625 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3626 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3628 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3629 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3631 else
3632 res = TREE_TYPE (rhs1);
3636 cache.put (def_stmt, res);
3637 return res;
3640 /* Return the proper type for converting bool VAR into
3641 an integer value or NULL_TREE if no such type exists.
3642 The type is chosen so that converted value has the
3643 same number of elements as VAR's vector type. */
3645 static tree
3646 search_type_for_mask (tree var, vec_info *vinfo)
3648 hash_map<gimple *, tree> cache;
3649 return search_type_for_mask_1 (var, vinfo, cache);
3652 /* Function vect_recog_bool_pattern
3654 Try to find pattern like following:
3656 bool a_b, b_b, c_b, d_b, e_b;
3657 TYPE f_T;
3658 loop:
3659 S1 a_b = x1 CMP1 y1;
3660 S2 b_b = x2 CMP2 y2;
3661 S3 c_b = a_b & b_b;
3662 S4 d_b = x3 CMP3 y3;
3663 S5 e_b = c_b | d_b;
3664 S6 f_T = (TYPE) e_b;
3666 where type 'TYPE' is an integral type. Or a similar pattern
3667 ending in
3669 S6 f_Y = e_b ? r_Y : s_Y;
3671 as results from if-conversion of a complex condition.
3673 Input:
3675 * LAST_STMT: A stmt at the end from which the pattern
3676 search begins, i.e. cast of a bool to
3677 an integer type.
3679 Output:
3681 * TYPE_IN: The type of the input arguments to the pattern.
3683 * TYPE_OUT: The type of the output of this pattern.
3685 * Return value: A new stmt that will be used to replace the pattern.
3687 Assuming size of TYPE is the same as size of all comparisons
3688 (otherwise some casts would be added where needed), the above
3689 sequence we create related pattern stmts:
3690 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3691 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3692 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3693 S5' e_T = c_T | d_T;
3694 S6' f_T = e_T;
3696 Instead of the above S3' we could emit:
3697 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3698 S3' c_T = a_T | b_T;
3699 but the above is more efficient. */
3701 static gimple *
3702 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3703 tree *type_out)
3705 gimple *last_stmt = stmts->pop ();
3706 enum tree_code rhs_code;
3707 tree var, lhs, rhs, vectype;
3708 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3709 stmt_vec_info new_stmt_info;
3710 vec_info *vinfo = stmt_vinfo->vinfo;
3711 gimple *pattern_stmt;
3713 if (!is_gimple_assign (last_stmt))
3714 return NULL;
3716 var = gimple_assign_rhs1 (last_stmt);
3717 lhs = gimple_assign_lhs (last_stmt);
3719 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3720 return NULL;
3722 hash_set<gimple *> bool_stmts;
3724 rhs_code = gimple_assign_rhs_code (last_stmt);
3725 if (CONVERT_EXPR_CODE_P (rhs_code))
3727 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3728 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3729 return NULL;
3730 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3731 if (vectype == NULL_TREE)
3732 return NULL;
3734 if (check_bool_pattern (var, vinfo, bool_stmts))
3736 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3737 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3738 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3739 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3740 else
3741 pattern_stmt
3742 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3744 else
3746 tree type = search_type_for_mask (var, vinfo);
3747 tree cst0, cst1, tmp;
3749 if (!type)
3750 return NULL;
3752 /* We may directly use cond with narrowed type to avoid
3753 multiple cond exprs with following result packing and
3754 perform single cond with packed mask instead. In case
3755 of widening we better make cond first and then extract
3756 results. */
3757 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3758 type = TREE_TYPE (lhs);
3760 cst0 = build_int_cst (type, 0);
3761 cst1 = build_int_cst (type, 1);
3762 tmp = vect_recog_temp_ssa_var (type, NULL);
3763 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3765 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3767 tree new_vectype = get_vectype_for_scalar_type (type);
3768 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3769 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3770 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3771 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3773 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3774 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3778 *type_out = vectype;
3779 *type_in = vectype;
3780 stmts->safe_push (last_stmt);
3781 if (dump_enabled_p ())
3782 dump_printf_loc (MSG_NOTE, vect_location,
3783 "vect_recog_bool_pattern: detected:\n");
3785 return pattern_stmt;
3787 else if (rhs_code == COND_EXPR
3788 && TREE_CODE (var) == SSA_NAME)
3790 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3791 if (vectype == NULL_TREE)
3792 return NULL;
3794 /* Build a scalar type for the boolean result that when
3795 vectorized matches the vector type of the result in
3796 size and number of elements. */
3797 unsigned prec
3798 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3799 TYPE_VECTOR_SUBPARTS (vectype));
3801 tree type
3802 = build_nonstandard_integer_type (prec,
3803 TYPE_UNSIGNED (TREE_TYPE (var)));
3804 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3805 return NULL;
3807 if (!check_bool_pattern (var, vinfo, bool_stmts))
3808 return NULL;
3810 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3812 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3813 pattern_stmt
3814 = gimple_build_assign (lhs, COND_EXPR,
3815 build2 (NE_EXPR, boolean_type_node,
3816 rhs, build_int_cst (type, 0)),
3817 gimple_assign_rhs2 (last_stmt),
3818 gimple_assign_rhs3 (last_stmt));
3819 *type_out = vectype;
3820 *type_in = vectype;
3821 stmts->safe_push (last_stmt);
3822 if (dump_enabled_p ())
3823 dump_printf_loc (MSG_NOTE, vect_location,
3824 "vect_recog_bool_pattern: detected:\n");
3826 return pattern_stmt;
3828 else if (rhs_code == SSA_NAME
3829 && STMT_VINFO_DATA_REF (stmt_vinfo))
3831 stmt_vec_info pattern_stmt_info;
3832 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3833 gcc_assert (vectype != NULL_TREE);
3834 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3835 return NULL;
3837 if (check_bool_pattern (var, vinfo, bool_stmts))
3838 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3839 else
3841 tree type = search_type_for_mask (var, vinfo);
3842 tree cst0, cst1, new_vectype;
3844 if (!type)
3845 return NULL;
3847 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3848 type = TREE_TYPE (vectype);
3850 cst0 = build_int_cst (type, 0);
3851 cst1 = build_int_cst (type, 1);
3852 new_vectype = get_vectype_for_scalar_type (type);
3854 rhs = vect_recog_temp_ssa_var (type, NULL);
3855 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3857 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3858 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3859 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3860 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3863 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3864 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3866 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3867 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3868 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3869 rhs = rhs2;
3871 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3872 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3873 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3874 STMT_VINFO_DATA_REF (pattern_stmt_info)
3875 = STMT_VINFO_DATA_REF (stmt_vinfo);
3876 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3877 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3878 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3879 *type_out = vectype;
3880 *type_in = vectype;
3881 stmts->safe_push (last_stmt);
3882 if (dump_enabled_p ())
3883 dump_printf_loc (MSG_NOTE, vect_location,
3884 "vect_recog_bool_pattern: detected:\n");
3885 return pattern_stmt;
3887 else
3888 return NULL;
3892 /* A helper for vect_recog_mask_conversion_pattern. Build
3893 conversion of MASK to a type suitable for masking VECTYPE.
3894 Built statement gets required vectype and is appended to
3895 a pattern sequence of STMT_VINFO.
3897 Return converted mask. */
3899 static tree
3900 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3901 vec_info *vinfo)
3903 gimple *stmt;
3904 tree masktype, tmp;
3905 stmt_vec_info new_stmt_info;
3907 masktype = build_same_sized_truth_vector_type (vectype);
3908 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3909 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3910 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3911 set_vinfo_for_stmt (stmt, new_stmt_info);
3912 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3913 append_pattern_def_seq (stmt_vinfo, stmt);
3915 return tmp;
3919 /* Function vect_recog_mask_conversion_pattern
3921 Try to find statements which require boolean type
3922 converison. Additional conversion statements are
3923 added to handle such cases. For example:
3925 bool m_1, m_2, m_3;
3926 int i_4, i_5;
3927 double d_6, d_7;
3928 char c_1, c_2, c_3;
3930 S1 m_1 = i_4 > i_5;
3931 S2 m_2 = d_6 < d_7;
3932 S3 m_3 = m_1 & m_2;
3933 S4 c_1 = m_3 ? c_2 : c_3;
3935 Will be transformed into:
3937 S1 m_1 = i_4 > i_5;
3938 S2 m_2 = d_6 < d_7;
3939 S3'' m_2' = (_Bool[bitsize=32])m_2
3940 S3' m_3' = m_1 & m_2';
3941 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3942 S4' c_1' = m_3'' ? c_2 : c_3; */
3944 static gimple *
3945 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3946 tree *type_out)
3948 gimple *last_stmt = stmts->pop ();
3949 enum tree_code rhs_code;
3950 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3951 tree vectype1, vectype2;
3952 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3953 stmt_vec_info pattern_stmt_info;
3954 vec_info *vinfo = stmt_vinfo->vinfo;
3956 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3957 if (is_gimple_call (last_stmt)
3958 && gimple_call_internal_p (last_stmt)
3959 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3960 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3962 gcall *pattern_stmt;
3963 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3965 if (load)
3967 lhs = gimple_call_lhs (last_stmt);
3968 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3970 else
3972 rhs2 = gimple_call_arg (last_stmt, 3);
3973 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3976 rhs1 = gimple_call_arg (last_stmt, 2);
3977 rhs1_type = search_type_for_mask (rhs1, vinfo);
3978 if (!rhs1_type)
3979 return NULL;
3980 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3982 if (!vectype1 || !vectype2
3983 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3984 TYPE_VECTOR_SUBPARTS (vectype2)))
3985 return NULL;
3987 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3989 if (load)
3991 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3992 pattern_stmt
3993 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3994 gimple_call_arg (last_stmt, 0),
3995 gimple_call_arg (last_stmt, 1),
3996 tmp);
3997 gimple_call_set_lhs (pattern_stmt, lhs);
3999 else
4000 pattern_stmt
4001 = gimple_build_call_internal (IFN_MASK_STORE, 4,
4002 gimple_call_arg (last_stmt, 0),
4003 gimple_call_arg (last_stmt, 1),
4004 tmp,
4005 gimple_call_arg (last_stmt, 3));
4007 gimple_call_set_nothrow (pattern_stmt, true);
4009 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4010 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4011 STMT_VINFO_DATA_REF (pattern_stmt_info)
4012 = STMT_VINFO_DATA_REF (stmt_vinfo);
4013 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4014 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
4015 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
4017 *type_out = vectype1;
4018 *type_in = vectype1;
4019 stmts->safe_push (last_stmt);
4020 if (dump_enabled_p ())
4021 dump_printf_loc (MSG_NOTE, vect_location,
4022 "vect_recog_mask_conversion_pattern: detected:\n");
4024 return pattern_stmt;
4027 if (!is_gimple_assign (last_stmt))
4028 return NULL;
4030 gimple *pattern_stmt;
4031 lhs = gimple_assign_lhs (last_stmt);
4032 rhs1 = gimple_assign_rhs1 (last_stmt);
4033 rhs_code = gimple_assign_rhs_code (last_stmt);
4035 /* Check for cond expression requiring mask conversion. */
4036 if (rhs_code == COND_EXPR)
4038 /* vect_recog_mixed_size_cond_pattern could apply.
4039 Do nothing then. */
4040 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4041 return NULL;
4043 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4045 if (TREE_CODE (rhs1) == SSA_NAME)
4047 rhs1_type = search_type_for_mask (rhs1, vinfo);
4048 if (!rhs1_type)
4049 return NULL;
4051 else if (COMPARISON_CLASS_P (rhs1))
4053 /* Check whether we're comparing scalar booleans and (if so)
4054 whether a better mask type exists than the mask associated
4055 with boolean-sized elements. This avoids unnecessary packs
4056 and unpacks if the booleans are set from comparisons of
4057 wider types. E.g. in:
4059 int x1, x2, x3, x4, y1, y1;
4061 bool b1 = (x1 == x2);
4062 bool b2 = (x3 == x4);
4063 ... = b1 == b2 ? y1 : y2;
4065 it is better for b1 and b2 to use the mask type associated
4066 with int elements rather bool (byte) elements. */
4067 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4068 if (!rhs1_type)
4069 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4071 else
4072 return NULL;
4074 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4076 if (!vectype1 || !vectype2)
4077 return NULL;
4079 /* Continue if a conversion is needed. Also continue if we have
4080 a comparison whose vector type would normally be different from
4081 VECTYPE2 when considered in isolation. In that case we'll
4082 replace the comparison with an SSA name (so that we can record
4083 its vector type) and behave as though the comparison was an SSA
4084 name from the outset. */
4085 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4086 TYPE_VECTOR_SUBPARTS (vectype2))
4087 && (TREE_CODE (rhs1) == SSA_NAME
4088 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4089 return NULL;
4091 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4092 in place, we can handle it in vectorizable_condition. This avoids
4093 unnecessary promotion stmts and increased vectorization factor. */
4094 if (COMPARISON_CLASS_P (rhs1)
4095 && INTEGRAL_TYPE_P (rhs1_type)
4096 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4097 TYPE_VECTOR_SUBPARTS (vectype2)))
4099 gimple *dummy;
4100 enum vect_def_type dt;
4101 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
4102 &dummy, &dt)
4103 && dt == vect_external_def
4104 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
4105 &dummy, &dt)
4106 && (dt == vect_external_def
4107 || dt == vect_constant_def))
4109 tree wide_scalar_type = build_nonstandard_integer_type
4110 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4111 TYPE_UNSIGNED (rhs1_type));
4112 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4113 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4114 return NULL;
4118 /* If rhs1 is a comparison we need to move it into a
4119 separate statement. */
4120 if (TREE_CODE (rhs1) != SSA_NAME)
4122 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4123 pattern_stmt = gimple_build_assign (tmp, rhs1);
4124 rhs1 = tmp;
4126 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4127 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4128 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4129 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4132 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4133 TYPE_VECTOR_SUBPARTS (vectype2)))
4134 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4135 else
4136 tmp = rhs1;
4138 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4139 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4140 gimple_assign_rhs2 (last_stmt),
4141 gimple_assign_rhs3 (last_stmt));
4143 *type_out = vectype1;
4144 *type_in = vectype1;
4145 stmts->safe_push (last_stmt);
4146 if (dump_enabled_p ())
4147 dump_printf_loc (MSG_NOTE, vect_location,
4148 "vect_recog_mask_conversion_pattern: detected:\n");
4150 return pattern_stmt;
4153 /* Now check for binary boolean operations requiring conversion for
4154 one of operands. */
4155 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4156 return NULL;
4158 if (rhs_code != BIT_IOR_EXPR
4159 && rhs_code != BIT_XOR_EXPR
4160 && rhs_code != BIT_AND_EXPR
4161 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4162 return NULL;
4164 rhs2 = gimple_assign_rhs2 (last_stmt);
4166 rhs1_type = search_type_for_mask (rhs1, vinfo);
4167 rhs2_type = search_type_for_mask (rhs2, vinfo);
4169 if (!rhs1_type || !rhs2_type
4170 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4171 return NULL;
4173 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4175 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4176 if (!vectype1)
4177 return NULL;
4178 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4180 else
4182 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4183 if (!vectype1)
4184 return NULL;
4185 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4188 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4189 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4191 *type_out = vectype1;
4192 *type_in = vectype1;
4193 stmts->safe_push (last_stmt);
4194 if (dump_enabled_p ())
4195 dump_printf_loc (MSG_NOTE, vect_location,
4196 "vect_recog_mask_conversion_pattern: detected:\n");
4198 return pattern_stmt;
4201 /* STMT is a load or store. If the load or store is conditional, return
4202 the boolean condition under which it occurs, otherwise return null. */
4204 static tree
4205 vect_get_load_store_mask (gimple *stmt)
4207 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4209 gcc_assert (gimple_assign_single_p (def_assign));
4210 return NULL_TREE;
4213 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4215 internal_fn ifn = gimple_call_internal_fn (def_call);
4216 int mask_index = internal_fn_mask_index (ifn);
4217 return gimple_call_arg (def_call, mask_index);
4220 gcc_unreachable ();
4223 /* Return the scalar offset type that an internal gather/scatter function
4224 should use. GS_INFO describes the gather/scatter operation. */
4226 static tree
4227 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4229 tree offset_type = TREE_TYPE (gs_info->offset);
4230 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4232 /* Enforced by vect_check_gather_scatter. */
4233 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4234 gcc_assert (element_bits >= offset_bits);
4236 /* If the offset is narrower than the elements, extend it according
4237 to its sign. */
4238 if (element_bits > offset_bits)
4239 return build_nonstandard_integer_type (element_bits,
4240 TYPE_UNSIGNED (offset_type));
4242 return offset_type;
4245 /* Return MASK if MASK is suitable for masking an operation on vectors
4246 of type VECTYPE, otherwise convert it into such a form and return
4247 the result. Associate any conversion statements with STMT_INFO's
4248 pattern. */
4250 static tree
4251 vect_convert_mask_for_vectype (tree mask, tree vectype,
4252 stmt_vec_info stmt_info, vec_info *vinfo)
4254 tree mask_type = search_type_for_mask (mask, vinfo);
4255 if (mask_type)
4257 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4258 if (mask_vectype
4259 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4260 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4261 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4263 return mask;
4266 /* Return the equivalent of:
4268 fold_convert (TYPE, VALUE)
4270 with the expectation that the operation will be vectorized.
4271 If new statements are needed, add them as pattern statements
4272 to STMT_INFO. */
4274 static tree
4275 vect_add_conversion_to_patterm (tree type, tree value,
4276 stmt_vec_info stmt_info,
4277 vec_info *vinfo)
4279 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4280 return value;
4282 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4283 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4284 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4285 set_vinfo_for_stmt (conversion, new_stmt_info);
4286 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4287 append_pattern_def_seq (stmt_info, conversion);
4288 return new_value;
4291 /* Try to convert STMT into a call to a gather load or scatter store
4292 internal function. Return the final statement on success and set
4293 *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4295 This function only handles gathers and scatters that were recognized
4296 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4298 static gimple *
4299 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4300 tree *type_in, tree *type_out)
4302 /* Currently we only support this for loop vectorization. */
4303 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4304 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4305 if (!loop_vinfo)
4306 return NULL;
4308 /* Make sure that we're looking at a gather load or scatter store. */
4309 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4310 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4311 return NULL;
4313 /* Get the boolean that controls whether the load or store happens.
4314 This is null if the operation is unconditional. */
4315 tree mask = vect_get_load_store_mask (stmt);
4317 /* Make sure that the target supports an appropriate internal
4318 function for the gather/scatter operation. */
4319 gather_scatter_info gs_info;
4320 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4321 || gs_info.decl)
4322 return NULL;
4324 /* Convert the mask to the right form. */
4325 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4326 if (mask)
4327 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4328 loop_vinfo);
4330 /* Get the invariant base and non-invariant offset, converting the
4331 latter to the same width as the vector elements. */
4332 tree base = gs_info.base;
4333 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4334 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4335 last_stmt_info, loop_vinfo);
4337 /* Build the new pattern statement. */
4338 tree scale = size_int (gs_info.scale);
4339 gcall *pattern_stmt;
4340 if (DR_IS_READ (dr))
4342 if (mask != NULL)
4343 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4344 offset, scale, mask);
4345 else
4346 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4347 offset, scale);
4348 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4349 gimple_call_set_lhs (pattern_stmt, load_lhs);
4351 else
4353 tree rhs = vect_get_store_rhs (stmt);
4354 if (mask != NULL)
4355 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4356 base, offset, scale, rhs,
4357 mask);
4358 else
4359 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4360 base, offset, scale, rhs);
4362 gimple_call_set_nothrow (pattern_stmt, true);
4364 /* Copy across relevant vectorization info and associate DR with the
4365 new pattern statement instead of the original statement. */
4366 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4367 loop_vinfo);
4368 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4369 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4370 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4371 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4372 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4373 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4374 DR_STMT (dr) = pattern_stmt;
4376 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4377 *type_out = vectype;
4378 *type_in = vectype;
4380 if (dump_enabled_p ())
4381 dump_printf_loc (MSG_NOTE, vect_location,
4382 "gather/scatter pattern detected:\n");
4384 return pattern_stmt;
4387 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4389 static gimple *
4390 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
4391 tree *type_out)
4393 gimple *last_stmt = stmts->pop ();
4394 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4395 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4396 last_stmt_info,
4397 type_in, type_out);
4398 if (pattern_stmt)
4399 stmts->safe_push (last_stmt);
4400 return pattern_stmt;
4403 /* Mark statements that are involved in a pattern. */
4405 static inline void
4406 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4407 tree pattern_vectype)
4409 stmt_vec_info pattern_stmt_info, def_stmt_info;
4410 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4411 vec_info *vinfo = orig_stmt_info->vinfo;
4412 gimple *def_stmt;
4414 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4415 if (pattern_stmt_info == NULL)
4417 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4418 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4420 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4422 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4423 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4424 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4425 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4426 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4427 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4428 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
4429 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4430 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
4432 gimple_stmt_iterator si;
4433 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
4434 !gsi_end_p (si); gsi_next (&si))
4436 def_stmt = gsi_stmt (si);
4437 def_stmt_info = vinfo_for_stmt (def_stmt);
4438 if (def_stmt_info == NULL)
4440 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4441 set_vinfo_for_stmt (def_stmt, def_stmt_info);
4443 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4444 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4445 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4446 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4447 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4452 /* Function vect_pattern_recog_1
4454 Input:
4455 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4456 computation pattern.
4457 STMT: A stmt from which the pattern search should start.
4459 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4460 expression that computes the same functionality and can be used to
4461 replace the sequence of stmts that are involved in the pattern.
4463 Output:
4464 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4465 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4466 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4467 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4468 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4469 to the available target pattern.
4471 This function also does some bookkeeping, as explained in the documentation
4472 for vect_recog_pattern. */
4474 static bool
4475 vect_pattern_recog_1 (vect_recog_func *recog_func,
4476 gimple_stmt_iterator si,
4477 vec<gimple *> *stmts_to_replace)
4479 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4480 stmt_vec_info stmt_info;
4481 loop_vec_info loop_vinfo;
4482 tree pattern_vectype;
4483 tree type_in, type_out;
4484 enum tree_code code;
4485 int i;
4486 gimple *next;
4488 stmts_to_replace->truncate (0);
4489 stmts_to_replace->quick_push (stmt);
4490 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4491 if (!pattern_stmt)
4492 return false;
4494 stmt = stmts_to_replace->last ();
4495 stmt_info = vinfo_for_stmt (stmt);
4496 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4498 if (VECTOR_BOOLEAN_TYPE_P (type_in)
4499 || VECTOR_TYPE_P (type_in))
4501 /* No need to check target support (already checked by the pattern
4502 recognition function). */
4503 pattern_vectype = type_out ? type_out : type_in;
4505 else
4507 /* Check target support */
4508 type_in = get_vectype_for_scalar_type (type_in);
4509 if (!type_in)
4510 return false;
4511 if (type_out)
4512 type_out = get_vectype_for_scalar_type (type_out);
4513 else
4514 type_out = type_in;
4515 if (!type_out)
4516 return false;
4517 pattern_vectype = type_out;
4519 if (is_gimple_assign (pattern_stmt))
4521 enum insn_code icode;
4522 code = gimple_assign_rhs_code (pattern_stmt);
4523 optab optab = optab_for_tree_code (code, type_in, optab_default);
4524 machine_mode vec_mode = TYPE_MODE (type_in);
4525 if (!optab
4526 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4527 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4528 return false;
4530 else
4531 gcc_assert (is_gimple_call (pattern_stmt));
4534 /* Found a vectorizable pattern. */
4535 if (dump_enabled_p ())
4537 dump_printf_loc (MSG_NOTE, vect_location,
4538 "%s pattern recognized: ", recog_func->name);
4539 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4542 /* Mark the stmts that are involved in the pattern. */
4543 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4545 /* Patterns cannot be vectorized using SLP, because they change the order of
4546 computation. */
4547 if (loop_vinfo)
4548 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
4549 if (next == stmt)
4550 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
4552 /* It is possible that additional pattern stmts are created and inserted in
4553 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4554 relevant statements. */
4555 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4556 && (unsigned) i < (stmts_to_replace->length () - 1);
4557 i++)
4559 stmt_info = vinfo_for_stmt (stmt);
4560 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4561 if (dump_enabled_p ())
4563 dump_printf_loc (MSG_NOTE, vect_location,
4564 "additional pattern stmt: ");
4565 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4568 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4571 return true;
4575 /* Function vect_pattern_recog
4577 Input:
4578 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4579 computation idioms.
4581 Output - for each computation idiom that is detected we create a new stmt
4582 that provides the same functionality and that can be vectorized. We
4583 also record some information in the struct_stmt_info of the relevant
4584 stmts, as explained below:
4586 At the entry to this function we have the following stmts, with the
4587 following initial value in the STMT_VINFO fields:
4589 stmt in_pattern_p related_stmt vec_stmt
4590 S1: a_i = .... - - -
4591 S2: a_2 = ..use(a_i).. - - -
4592 S3: a_1 = ..use(a_2).. - - -
4593 S4: a_0 = ..use(a_1).. - - -
4594 S5: ... = ..use(a_0).. - - -
4596 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4597 represented by a single stmt. We then:
4598 - create a new stmt S6 equivalent to the pattern (the stmt is not
4599 inserted into the code)
4600 - fill in the STMT_VINFO fields as follows:
4602 in_pattern_p related_stmt vec_stmt
4603 S1: a_i = .... - - -
4604 S2: a_2 = ..use(a_i).. - - -
4605 S3: a_1 = ..use(a_2).. - - -
4606 S4: a_0 = ..use(a_1).. true S6 -
4607 '---> S6: a_new = .... - S4 -
4608 S5: ... = ..use(a_0).. - - -
4610 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4611 to each other through the RELATED_STMT field).
4613 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4614 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4615 remain irrelevant unless used by stmts other than S4.
4617 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4618 (because they are marked as irrelevant). It will vectorize S6, and record
4619 a pointer to the new vector stmt VS6 from S6 (as usual).
4620 S4 will be skipped, and S5 will be vectorized as usual:
4622 in_pattern_p related_stmt vec_stmt
4623 S1: a_i = .... - - -
4624 S2: a_2 = ..use(a_i).. - - -
4625 S3: a_1 = ..use(a_2).. - - -
4626 > VS6: va_new = .... - - -
4627 S4: a_0 = ..use(a_1).. true S6 VS6
4628 '---> S6: a_new = .... - S4 VS6
4629 > VS5: ... = ..vuse(va_new).. - - -
4630 S5: ... = ..use(a_0).. - - -
4632 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4633 elsewhere), and we'll end up with:
4635 VS6: va_new = ....
4636 VS5: ... = ..vuse(va_new)..
4638 In case of more than one pattern statements, e.g., widen-mult with
4639 intermediate type:
4641 S1 a_t = ;
4642 S2 a_T = (TYPE) a_t;
4643 '--> S3: a_it = (interm_type) a_t;
4644 S4 prod_T = a_T * CONST;
4645 '--> S5: prod_T' = a_it w* CONST;
4647 there may be other users of a_T outside the pattern. In that case S2 will
4648 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4649 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4650 be recorded in S3. */
4652 void
4653 vect_pattern_recog (vec_info *vinfo)
4655 struct loop *loop;
4656 basic_block *bbs;
4657 unsigned int nbbs;
4658 gimple_stmt_iterator si;
4659 unsigned int i, j;
4660 auto_vec<gimple *, 1> stmts_to_replace;
4661 gimple *stmt;
4663 if (dump_enabled_p ())
4664 dump_printf_loc (MSG_NOTE, vect_location,
4665 "=== vect_pattern_recog ===\n");
4667 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4669 loop = LOOP_VINFO_LOOP (loop_vinfo);
4670 bbs = LOOP_VINFO_BBS (loop_vinfo);
4671 nbbs = loop->num_nodes;
4673 /* Scan through the loop stmts, applying the pattern recognition
4674 functions starting at each stmt visited: */
4675 for (i = 0; i < nbbs; i++)
4677 basic_block bb = bbs[i];
4678 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4680 /* Scan over all generic vect_recog_xxx_pattern functions. */
4681 for (j = 0; j < NUM_PATTERNS; j++)
4682 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4683 &stmts_to_replace))
4684 break;
4688 else
4690 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4691 for (si = bb_vinfo->region_begin;
4692 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4694 if ((stmt = gsi_stmt (si))
4695 && vinfo_for_stmt (stmt)
4696 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4697 continue;
4699 /* Scan over all generic vect_recog_xxx_pattern functions. */
4700 for (j = 0; j < NUM_PATTERNS; j++)
4701 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4702 &stmts_to_replace))
4703 break;