2016-01-26 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-patterns.c
blob712b34c39c3db3adbf25208cb926ec5663929296
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
45 /* Pattern recognition functions */
46 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
47 tree *);
48 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
49 tree *);
50 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
53 tree *);
54 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
55 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
56 tree *);
57 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
58 tree *, tree *);
59 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
60 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
61 tree *, tree *);
62 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
63 tree *, tree *);
65 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
66 tree *, tree *);
68 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69 tree *, tree *);
70 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
71 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
73 struct vect_recog_func
75 vect_recog_func_ptr fn;
76 const char *name;
79 /* Note that ordering matters - the first pattern matching on a stmt
80 is taken which means usually the more complex one needs to preceed
81 the less comples onex (widen_sum only after dot_prod or sad for example). */
82 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
83 { vect_recog_widen_mult_pattern, "widen_mult" },
84 { vect_recog_dot_prod_pattern, "dot_prod" },
85 { vect_recog_sad_pattern, "sad" },
86 { vect_recog_widen_sum_pattern, "widen_sum" },
87 { vect_recog_pow_pattern, "pow" },
88 { vect_recog_widen_shift_pattern, "widen_shift" },
89 { vect_recog_over_widening_pattern, "over_widening" },
90 { vect_recog_rotate_pattern, "rotate" },
91 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
92 { vect_recog_divmod_pattern, "divmod" },
93 { vect_recog_mult_pattern, "mult" },
94 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
95 { vect_recog_bool_pattern, "bool" },
96 { vect_recog_mask_conversion_pattern, "mask_conversion" }
99 static inline void
100 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
102 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
103 stmt);
106 static inline void
107 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
109 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
110 append_pattern_def_seq (stmt_info, stmt);
113 /* Check whether STMT2 is in the same loop or basic block as STMT1.
114 Which of the two applies depends on whether we're currently doing
115 loop-based or basic-block-based vectorization, as determined by
116 the vinfo_for_stmt for STMT1 (which must be defined).
118 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
119 to be defined as well. */
121 static bool
122 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
125 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
128 /* If the LHS of DEF_STMT has a single use, and that statement is
129 in the same loop or basic block, return it. */
131 static gimple *
132 vect_single_imm_use (gimple *def_stmt)
134 tree lhs = gimple_assign_lhs (def_stmt);
135 use_operand_p use_p;
136 gimple *use_stmt;
138 if (!single_imm_use (lhs, &use_p, &use_stmt))
139 return NULL;
141 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
142 return NULL;
144 return use_stmt;
147 /* Check whether NAME, an ssa-name used in USE_STMT,
148 is a result of a type promotion, such that:
149 DEF_STMT: NAME = NOP (name0)
150 If CHECK_SIGN is TRUE, check that either both types are signed or both are
151 unsigned. */
153 static bool
154 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
155 tree *orig_type, gimple **def_stmt, bool *promotion)
157 gimple *dummy_gimple;
158 stmt_vec_info stmt_vinfo;
159 tree type = TREE_TYPE (name);
160 tree oprnd0;
161 enum vect_def_type dt;
163 stmt_vinfo = vinfo_for_stmt (use_stmt);
164 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
165 return false;
167 if (dt != vect_internal_def
168 && dt != vect_external_def && dt != vect_constant_def)
169 return false;
171 if (!*def_stmt)
172 return false;
174 if (!is_gimple_assign (*def_stmt))
175 return false;
177 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
178 return false;
180 oprnd0 = gimple_assign_rhs1 (*def_stmt);
182 *orig_type = TREE_TYPE (oprnd0);
183 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
184 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
185 return false;
187 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
188 *promotion = true;
189 else
190 *promotion = false;
192 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
193 return false;
195 return true;
198 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
199 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
201 static tree
202 vect_recog_temp_ssa_var (tree type, gimple *stmt)
204 return make_temp_ssa_name (type, stmt, "patt");
207 /* Function vect_recog_dot_prod_pattern
209 Try to find the following pattern:
211 type x_t, y_t;
212 TYPE1 prod;
213 TYPE2 sum = init;
214 loop:
215 sum_0 = phi <init, sum_1>
216 S1 x_t = ...
217 S2 y_t = ...
218 S3 x_T = (TYPE1) x_t;
219 S4 y_T = (TYPE1) y_t;
220 S5 prod = x_T * y_T;
221 [S6 prod = (TYPE2) prod; #optional]
222 S7 sum_1 = prod + sum_0;
224 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
225 same size of 'TYPE1' or bigger. This is a special case of a reduction
226 computation.
228 Input:
230 * STMTS: Contains a stmt from which the pattern search begins. In the
231 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
232 will be detected.
234 Output:
236 * TYPE_IN: The type of the input arguments to the pattern.
238 * TYPE_OUT: The type of the output of this pattern.
240 * Return value: A new stmt that will be used to replace the sequence of
241 stmts that constitute the pattern. In this case it will be:
242 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
244 Note: The dot-prod idiom is a widening reduction pattern that is
245 vectorized without preserving all the intermediate results. It
246 produces only N/2 (widened) results (by summing up pairs of
247 intermediate results) rather than all N results. Therefore, we
248 cannot allow this pattern when we want to get all the results and in
249 the correct order (as is the case when this computation is in an
250 inner-loop nested in an outer-loop that us being vectorized). */
252 static gimple *
253 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
254 tree *type_out)
256 gimple *stmt, *last_stmt = (*stmts)[0];
257 tree oprnd0, oprnd1;
258 tree oprnd00, oprnd01;
259 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
260 tree type, half_type;
261 gimple *pattern_stmt;
262 tree prod_type;
263 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
264 struct loop *loop;
265 tree var;
266 bool promotion;
268 if (!loop_info)
269 return NULL;
271 loop = LOOP_VINFO_LOOP (loop_info);
273 /* We don't allow changing the order of the computation in the inner-loop
274 when doing outer-loop vectorization. */
275 if (loop && nested_in_vect_loop_p (loop, last_stmt))
276 return NULL;
278 if (!is_gimple_assign (last_stmt))
279 return NULL;
281 type = gimple_expr_type (last_stmt);
283 /* Look for the following pattern
284 DX = (TYPE1) X;
285 DY = (TYPE1) Y;
286 DPROD = DX * DY;
287 DDPROD = (TYPE2) DPROD;
288 sum_1 = DDPROD + sum_0;
289 In which
290 - DX is double the size of X
291 - DY is double the size of Y
292 - DX, DY, DPROD all have the same type
293 - sum is the same size of DPROD or bigger
294 - sum has been recognized as a reduction variable.
296 This is equivalent to:
297 DPROD = X w* Y; #widen mult
298 sum_1 = DPROD w+ sum_0; #widen summation
300 DPROD = X w* Y; #widen mult
301 sum_1 = DPROD + sum_0; #summation
304 /* Starting from LAST_STMT, follow the defs of its uses in search
305 of the above pattern. */
307 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
308 return NULL;
310 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
312 /* Has been detected as widening-summation? */
314 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
315 type = gimple_expr_type (stmt);
316 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
317 return NULL;
318 oprnd0 = gimple_assign_rhs1 (stmt);
319 oprnd1 = gimple_assign_rhs2 (stmt);
320 half_type = TREE_TYPE (oprnd0);
322 else
324 gimple *def_stmt;
326 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
327 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
328 return NULL;
329 oprnd0 = gimple_assign_rhs1 (last_stmt);
330 oprnd1 = gimple_assign_rhs2 (last_stmt);
331 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
332 || !types_compatible_p (TREE_TYPE (oprnd1), type))
333 return NULL;
334 stmt = last_stmt;
336 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
337 &promotion)
338 && promotion)
340 stmt = def_stmt;
341 oprnd0 = gimple_assign_rhs1 (stmt);
343 else
344 half_type = type;
347 /* So far so good. Since last_stmt was detected as a (summation) reduction,
348 we know that oprnd1 is the reduction variable (defined by a loop-header
349 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
350 Left to check that oprnd0 is defined by a (widen_)mult_expr */
351 if (TREE_CODE (oprnd0) != SSA_NAME)
352 return NULL;
354 prod_type = half_type;
355 stmt = SSA_NAME_DEF_STMT (oprnd0);
357 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
358 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
359 return NULL;
361 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
362 inside the loop (in case we are analyzing an outer-loop). */
363 if (!is_gimple_assign (stmt))
364 return NULL;
365 stmt_vinfo = vinfo_for_stmt (stmt);
366 gcc_assert (stmt_vinfo);
367 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
368 return NULL;
369 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
370 return NULL;
371 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
373 /* Has been detected as a widening multiplication? */
375 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
376 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
377 return NULL;
378 stmt_vinfo = vinfo_for_stmt (stmt);
379 gcc_assert (stmt_vinfo);
380 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
381 oprnd00 = gimple_assign_rhs1 (stmt);
382 oprnd01 = gimple_assign_rhs2 (stmt);
383 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
384 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
386 else
388 tree half_type0, half_type1;
389 gimple *def_stmt;
390 tree oprnd0, oprnd1;
392 oprnd0 = gimple_assign_rhs1 (stmt);
393 oprnd1 = gimple_assign_rhs2 (stmt);
394 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
395 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
396 return NULL;
397 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
398 &promotion)
399 || !promotion)
400 return NULL;
401 oprnd00 = gimple_assign_rhs1 (def_stmt);
402 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
403 &promotion)
404 || !promotion)
405 return NULL;
406 oprnd01 = gimple_assign_rhs1 (def_stmt);
407 if (!types_compatible_p (half_type0, half_type1))
408 return NULL;
409 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
410 return NULL;
413 half_type = TREE_TYPE (oprnd00);
414 *type_in = half_type;
415 *type_out = type;
417 /* Pattern detected. Create a stmt to be used to replace the pattern: */
418 var = vect_recog_temp_ssa_var (type, NULL);
419 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
420 oprnd00, oprnd01, oprnd1);
422 if (dump_enabled_p ())
424 dump_printf_loc (MSG_NOTE, vect_location,
425 "vect_recog_dot_prod_pattern: detected: ");
426 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
427 dump_printf (MSG_NOTE, "\n");
430 return pattern_stmt;
434 /* Function vect_recog_sad_pattern
436 Try to find the following Sum of Absolute Difference (SAD) pattern:
438 type x_t, y_t;
439 signed TYPE1 diff, abs_diff;
440 TYPE2 sum = init;
441 loop:
442 sum_0 = phi <init, sum_1>
443 S1 x_t = ...
444 S2 y_t = ...
445 S3 x_T = (TYPE1) x_t;
446 S4 y_T = (TYPE1) y_t;
447 S5 diff = x_T - y_T;
448 S6 abs_diff = ABS_EXPR <diff>;
449 [S7 abs_diff = (TYPE2) abs_diff; #optional]
450 S8 sum_1 = abs_diff + sum_0;
452 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
453 same size of 'TYPE1' or bigger. This is a special case of a reduction
454 computation.
456 Input:
458 * STMTS: Contains a stmt from which the pattern search begins. In the
459 example, when this function is called with S8, the pattern
460 {S3,S4,S5,S6,S7,S8} will be detected.
462 Output:
464 * TYPE_IN: The type of the input arguments to the pattern.
466 * TYPE_OUT: The type of the output of this pattern.
468 * Return value: A new stmt that will be used to replace the sequence of
469 stmts that constitute the pattern. In this case it will be:
470 SAD_EXPR <x_t, y_t, sum_0>
473 static gimple *
474 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
475 tree *type_out)
477 gimple *last_stmt = (*stmts)[0];
478 tree sad_oprnd0, sad_oprnd1;
479 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
480 tree half_type;
481 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
482 struct loop *loop;
483 bool promotion;
485 if (!loop_info)
486 return NULL;
488 loop = LOOP_VINFO_LOOP (loop_info);
490 /* We don't allow changing the order of the computation in the inner-loop
491 when doing outer-loop vectorization. */
492 if (loop && nested_in_vect_loop_p (loop, last_stmt))
493 return NULL;
495 if (!is_gimple_assign (last_stmt))
496 return NULL;
498 tree sum_type = gimple_expr_type (last_stmt);
500 /* Look for the following pattern
501 DX = (TYPE1) X;
502 DY = (TYPE1) Y;
503 DDIFF = DX - DY;
504 DAD = ABS_EXPR <DDIFF>;
505 DDPROD = (TYPE2) DPROD;
506 sum_1 = DAD + sum_0;
507 In which
508 - DX is at least double the size of X
509 - DY is at least double the size of Y
510 - DX, DY, DDIFF, DAD all have the same type
511 - sum is the same size of DAD or bigger
512 - sum has been recognized as a reduction variable.
514 This is equivalent to:
515 DDIFF = X w- Y; #widen sub
516 DAD = ABS_EXPR <DDIFF>;
517 sum_1 = DAD w+ sum_0; #widen summation
519 DDIFF = X w- Y; #widen sub
520 DAD = ABS_EXPR <DDIFF>;
521 sum_1 = DAD + sum_0; #summation
524 /* Starting from LAST_STMT, follow the defs of its uses in search
525 of the above pattern. */
527 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
528 return NULL;
530 tree plus_oprnd0, plus_oprnd1;
532 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
534 /* Has been detected as widening-summation? */
536 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
537 sum_type = gimple_expr_type (stmt);
538 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
539 return NULL;
540 plus_oprnd0 = gimple_assign_rhs1 (stmt);
541 plus_oprnd1 = gimple_assign_rhs2 (stmt);
542 half_type = TREE_TYPE (plus_oprnd0);
544 else
546 gimple *def_stmt;
548 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
549 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
550 return NULL;
551 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
552 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
553 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
554 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
555 return NULL;
557 /* The type conversion could be promotion, demotion,
558 or just signed -> unsigned. */
559 if (type_conversion_p (plus_oprnd0, last_stmt, false,
560 &half_type, &def_stmt, &promotion))
561 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
562 else
563 half_type = sum_type;
566 /* So far so good. Since last_stmt was detected as a (summation) reduction,
567 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
568 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
569 Then check that plus_oprnd0 is defined by an abs_expr. */
571 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
572 return NULL;
574 tree abs_type = half_type;
575 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
577 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
578 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
579 return NULL;
581 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
582 inside the loop (in case we are analyzing an outer-loop). */
583 if (!is_gimple_assign (abs_stmt))
584 return NULL;
586 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
587 gcc_assert (abs_stmt_vinfo);
588 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
589 return NULL;
590 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
591 return NULL;
593 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
594 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
595 return NULL;
596 if (TYPE_UNSIGNED (abs_type))
597 return NULL;
599 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
601 if (TREE_CODE (abs_oprnd) != SSA_NAME)
602 return NULL;
604 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
606 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
607 if (!gimple_bb (diff_stmt)
608 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
609 return NULL;
611 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
612 inside the loop (in case we are analyzing an outer-loop). */
613 if (!is_gimple_assign (diff_stmt))
614 return NULL;
616 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
617 gcc_assert (diff_stmt_vinfo);
618 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
619 return NULL;
620 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
621 return NULL;
623 tree half_type0, half_type1;
624 gimple *def_stmt;
626 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
627 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
629 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
630 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
631 return NULL;
632 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
633 &half_type0, &def_stmt, &promotion)
634 || !promotion)
635 return NULL;
636 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
638 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
639 &half_type1, &def_stmt, &promotion)
640 || !promotion)
641 return NULL;
642 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
644 if (!types_compatible_p (half_type0, half_type1))
645 return NULL;
646 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
647 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
648 return NULL;
650 *type_in = TREE_TYPE (sad_oprnd0);
651 *type_out = sum_type;
653 /* Pattern detected. Create a stmt to be used to replace the pattern: */
654 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
655 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
656 sad_oprnd1, plus_oprnd1);
658 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location,
661 "vect_recog_sad_pattern: detected: ");
662 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
663 dump_printf (MSG_NOTE, "\n");
666 return pattern_stmt;
670 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
671 and LSHIFT_EXPR.
673 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
674 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
676 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
677 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
678 that satisfies the above restrictions, we can perform a widening opeartion
679 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
680 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
682 static bool
683 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
684 tree const_oprnd, tree *oprnd,
685 gimple **wstmt, tree type,
686 tree *half_type, gimple *def_stmt)
688 tree new_type, new_oprnd;
690 if (code != MULT_EXPR && code != LSHIFT_EXPR)
691 return false;
693 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
694 || (code == LSHIFT_EXPR
695 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
696 != 1))
697 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
699 /* CONST_OPRND is a constant of HALF_TYPE. */
700 *oprnd = gimple_assign_rhs1 (def_stmt);
701 return true;
704 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
705 return false;
707 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
708 return false;
710 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
711 a type 2 times bigger than HALF_TYPE. */
712 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
713 TYPE_UNSIGNED (type));
714 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
715 || (code == LSHIFT_EXPR
716 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
717 return false;
719 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
720 *oprnd = gimple_assign_rhs1 (def_stmt);
721 new_oprnd = make_ssa_name (new_type);
722 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
723 *oprnd = new_oprnd;
725 *half_type = new_type;
726 return true;
730 /* Function vect_recog_widen_mult_pattern
732 Try to find the following pattern:
734 type1 a_t;
735 type2 b_t;
736 TYPE a_T, b_T, prod_T;
738 S1 a_t = ;
739 S2 b_t = ;
740 S3 a_T = (TYPE) a_t;
741 S4 b_T = (TYPE) b_t;
742 S5 prod_T = a_T * b_T;
744 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
746 Also detect unsigned cases:
748 unsigned type1 a_t;
749 unsigned type2 b_t;
750 unsigned TYPE u_prod_T;
751 TYPE a_T, b_T, prod_T;
753 S1 a_t = ;
754 S2 b_t = ;
755 S3 a_T = (TYPE) a_t;
756 S4 b_T = (TYPE) b_t;
757 S5 prod_T = a_T * b_T;
758 S6 u_prod_T = (unsigned TYPE) prod_T;
760 and multiplication by constants:
762 type a_t;
763 TYPE a_T, prod_T;
765 S1 a_t = ;
766 S3 a_T = (TYPE) a_t;
767 S5 prod_T = a_T * CONST;
769 A special case of multiplication by constants is when 'TYPE' is 4 times
770 bigger than 'type', but CONST fits an intermediate type 2 times smaller
771 than 'TYPE'. In that case we create an additional pattern stmt for S3
772 to create a variable of the intermediate type, and perform widen-mult
773 on the intermediate type as well:
775 type a_t;
776 interm_type a_it;
777 TYPE a_T, prod_T, prod_T';
779 S1 a_t = ;
780 S3 a_T = (TYPE) a_t;
781 '--> a_it = (interm_type) a_t;
782 S5 prod_T = a_T * CONST;
783 '--> prod_T' = a_it w* CONST;
785 Input/Output:
787 * STMTS: Contains a stmt from which the pattern search begins. In the
788 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
789 is detected. In case of unsigned widen-mult, the original stmt (S5) is
790 replaced with S6 in STMTS. In case of multiplication by a constant
791 of an intermediate type (the last case above), STMTS also contains S3
792 (inserted before S5).
794 Output:
796 * TYPE_IN: The type of the input arguments to the pattern.
798 * TYPE_OUT: The type of the output of this pattern.
800 * Return value: A new stmt that will be used to replace the sequence of
801 stmts that constitute the pattern. In this case it will be:
802 WIDEN_MULT <a_t, b_t>
803 If the result of WIDEN_MULT needs to be converted to a larger type, the
804 returned stmt will be this type conversion stmt.
807 static gimple *
808 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
809 tree *type_in, tree *type_out)
811 gimple *last_stmt = stmts->pop ();
812 gimple *def_stmt0, *def_stmt1;
813 tree oprnd0, oprnd1;
814 tree type, half_type0, half_type1;
815 gimple *new_stmt = NULL, *pattern_stmt = NULL;
816 tree vectype, vecitype;
817 tree var;
818 enum tree_code dummy_code;
819 int dummy_int;
820 vec<tree> dummy_vec;
821 bool op1_ok;
822 bool promotion;
824 if (!is_gimple_assign (last_stmt))
825 return NULL;
827 type = gimple_expr_type (last_stmt);
829 /* Starting from LAST_STMT, follow the defs of its uses in search
830 of the above pattern. */
832 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
833 return NULL;
835 oprnd0 = gimple_assign_rhs1 (last_stmt);
836 oprnd1 = gimple_assign_rhs2 (last_stmt);
837 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
838 || !types_compatible_p (TREE_TYPE (oprnd1), type))
839 return NULL;
841 /* Check argument 0. */
842 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
843 &promotion)
844 || !promotion)
845 return NULL;
846 /* Check argument 1. */
847 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
848 &def_stmt1, &promotion);
850 if (op1_ok && promotion)
852 oprnd0 = gimple_assign_rhs1 (def_stmt0);
853 oprnd1 = gimple_assign_rhs1 (def_stmt1);
855 else
857 if (TREE_CODE (oprnd1) == INTEGER_CST
858 && TREE_CODE (half_type0) == INTEGER_TYPE
859 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
860 &oprnd0, &new_stmt, type,
861 &half_type0, def_stmt0))
863 half_type1 = half_type0;
864 oprnd1 = fold_convert (half_type1, oprnd1);
866 else
867 return NULL;
870 /* If the two arguments have different sizes, convert the one with
871 the smaller type into the larger type. */
872 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
874 /* If we already used up the single-stmt slot give up. */
875 if (new_stmt)
876 return NULL;
878 tree* oprnd = NULL;
879 gimple *def_stmt = NULL;
881 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
883 def_stmt = def_stmt0;
884 half_type0 = half_type1;
885 oprnd = &oprnd0;
887 else
889 def_stmt = def_stmt1;
890 half_type1 = half_type0;
891 oprnd = &oprnd1;
894 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
895 tree new_oprnd = make_ssa_name (half_type0);
896 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
897 *oprnd = new_oprnd;
900 /* Handle unsigned case. Look for
901 S6 u_prod_T = (unsigned TYPE) prod_T;
902 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
903 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
905 gimple *use_stmt;
906 tree use_lhs;
907 tree use_type;
909 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
910 return NULL;
912 use_stmt = vect_single_imm_use (last_stmt);
913 if (!use_stmt || !is_gimple_assign (use_stmt)
914 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
915 return NULL;
917 use_lhs = gimple_assign_lhs (use_stmt);
918 use_type = TREE_TYPE (use_lhs);
919 if (!INTEGRAL_TYPE_P (use_type)
920 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
921 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
922 return NULL;
924 type = use_type;
925 last_stmt = use_stmt;
928 if (!types_compatible_p (half_type0, half_type1))
929 return NULL;
931 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
932 to get an intermediate result of type ITYPE. In this case we need
933 to build a statement to convert this intermediate result to type TYPE. */
934 tree itype = type;
935 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
936 itype = build_nonstandard_integer_type
937 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
938 TYPE_UNSIGNED (type));
940 /* Pattern detected. */
941 if (dump_enabled_p ())
942 dump_printf_loc (MSG_NOTE, vect_location,
943 "vect_recog_widen_mult_pattern: detected:\n");
945 /* Check target support */
946 vectype = get_vectype_for_scalar_type (half_type0);
947 vecitype = get_vectype_for_scalar_type (itype);
948 if (!vectype
949 || !vecitype
950 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
951 vecitype, vectype,
952 &dummy_code, &dummy_code,
953 &dummy_int, &dummy_vec))
954 return NULL;
956 *type_in = vectype;
957 *type_out = get_vectype_for_scalar_type (type);
959 /* Pattern supported. Create a stmt to be used to replace the pattern: */
960 var = vect_recog_temp_ssa_var (itype, NULL);
961 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
963 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
964 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
966 /* If the original two operands have different sizes, we may need to convert
967 the smaller one into the larget type. If this is the case, at this point
968 the new stmt is already built. */
969 if (new_stmt)
971 append_pattern_def_seq (stmt_vinfo, new_stmt);
972 stmt_vec_info new_stmt_info
973 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
974 set_vinfo_for_stmt (new_stmt, new_stmt_info);
975 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
978 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
979 the result of the widen-mult operation into type TYPE. */
980 if (itype != type)
982 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
983 stmt_vec_info pattern_stmt_info
984 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
985 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
986 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
987 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
988 NOP_EXPR,
989 gimple_assign_lhs (pattern_stmt));
992 if (dump_enabled_p ())
993 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
995 stmts->safe_push (last_stmt);
996 return pattern_stmt;
1000 /* Function vect_recog_pow_pattern
1002 Try to find the following pattern:
1004 x = POW (y, N);
1006 with POW being one of pow, powf, powi, powif and N being
1007 either 2 or 0.5.
1009 Input:
1011 * LAST_STMT: A stmt from which the pattern search begins.
1013 Output:
1015 * TYPE_IN: The type of the input arguments to the pattern.
1017 * TYPE_OUT: The type of the output of this pattern.
1019 * Return value: A new stmt that will be used to replace the sequence of
1020 stmts that constitute the pattern. In this case it will be:
1021 x = x * x
1023 x = sqrt (x)
1026 static gimple *
1027 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1028 tree *type_out)
1030 gimple *last_stmt = (*stmts)[0];
1031 tree base, exp = NULL;
1032 gimple *stmt;
1033 tree var;
1035 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1036 return NULL;
1038 switch (gimple_call_combined_fn (last_stmt))
1040 CASE_CFN_POW:
1041 CASE_CFN_POWI:
1042 base = gimple_call_arg (last_stmt, 0);
1043 exp = gimple_call_arg (last_stmt, 1);
1044 if (TREE_CODE (exp) != REAL_CST
1045 && TREE_CODE (exp) != INTEGER_CST)
1046 return NULL;
1047 break;
1049 default:
1050 return NULL;
1053 /* We now have a pow or powi builtin function call with a constant
1054 exponent. */
1056 *type_out = NULL_TREE;
1058 /* Catch squaring. */
1059 if ((tree_fits_shwi_p (exp)
1060 && tree_to_shwi (exp) == 2)
1061 || (TREE_CODE (exp) == REAL_CST
1062 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1064 *type_in = TREE_TYPE (base);
1066 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1067 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1068 return stmt;
1071 /* Catch square root. */
1072 if (TREE_CODE (exp) == REAL_CST
1073 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1075 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1076 if (*type_in
1077 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1078 OPTIMIZE_FOR_SPEED))
1080 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1081 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1082 gimple_call_set_lhs (stmt, var);
1083 return stmt;
1087 return NULL;
1091 /* Function vect_recog_widen_sum_pattern
1093 Try to find the following pattern:
1095 type x_t;
1096 TYPE x_T, sum = init;
1097 loop:
1098 sum_0 = phi <init, sum_1>
1099 S1 x_t = *p;
1100 S2 x_T = (TYPE) x_t;
1101 S3 sum_1 = x_T + sum_0;
1103 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1104 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1105 a special case of a reduction computation.
1107 Input:
1109 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1110 when this function is called with S3, the pattern {S2,S3} will be detected.
1112 Output:
1114 * TYPE_IN: The type of the input arguments to the pattern.
1116 * TYPE_OUT: The type of the output of this pattern.
1118 * Return value: A new stmt that will be used to replace the sequence of
1119 stmts that constitute the pattern. In this case it will be:
1120 WIDEN_SUM <x_t, sum_0>
1122 Note: The widening-sum idiom is a widening reduction pattern that is
1123 vectorized without preserving all the intermediate results. It
1124 produces only N/2 (widened) results (by summing up pairs of
1125 intermediate results) rather than all N results. Therefore, we
1126 cannot allow this pattern when we want to get all the results and in
1127 the correct order (as is the case when this computation is in an
1128 inner-loop nested in an outer-loop that us being vectorized). */
1130 static gimple *
1131 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1132 tree *type_out)
1134 gimple *stmt, *last_stmt = (*stmts)[0];
1135 tree oprnd0, oprnd1;
1136 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1137 tree type, half_type;
1138 gimple *pattern_stmt;
1139 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1140 struct loop *loop;
1141 tree var;
1142 bool promotion;
1144 if (!loop_info)
1145 return NULL;
1147 loop = LOOP_VINFO_LOOP (loop_info);
1149 /* We don't allow changing the order of the computation in the inner-loop
1150 when doing outer-loop vectorization. */
1151 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1152 return NULL;
1154 if (!is_gimple_assign (last_stmt))
1155 return NULL;
1157 type = gimple_expr_type (last_stmt);
1159 /* Look for the following pattern
1160 DX = (TYPE) X;
1161 sum_1 = DX + sum_0;
1162 In which DX is at least double the size of X, and sum_1 has been
1163 recognized as a reduction variable.
1166 /* Starting from LAST_STMT, follow the defs of its uses in search
1167 of the above pattern. */
1169 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1170 return NULL;
1172 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
1173 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
1174 return NULL;
1176 oprnd0 = gimple_assign_rhs1 (last_stmt);
1177 oprnd1 = gimple_assign_rhs2 (last_stmt);
1178 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1179 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1180 return NULL;
1182 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1183 we know that oprnd1 is the reduction variable (defined by a loop-header
1184 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1185 Left to check that oprnd0 is defined by a cast from type 'type' to type
1186 'TYPE'. */
1188 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1189 &promotion)
1190 || !promotion)
1191 return NULL;
1193 oprnd0 = gimple_assign_rhs1 (stmt);
1194 *type_in = half_type;
1195 *type_out = type;
1197 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1198 var = vect_recog_temp_ssa_var (type, NULL);
1199 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1201 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_recog_widen_sum_pattern: detected: ");
1205 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1206 dump_printf (MSG_NOTE, "\n");
1209 return pattern_stmt;
1213 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1215 Input:
1216 STMT - a statement to check.
1217 DEF - we support operations with two operands, one of which is constant.
1218 The other operand can be defined by a demotion operation, or by a
1219 previous statement in a sequence of over-promoted operations. In the
1220 later case DEF is used to replace that operand. (It is defined by a
1221 pattern statement we created for the previous statement in the
1222 sequence).
1224 Input/output:
1225 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1226 NULL, it's the type of DEF.
1227 STMTS - additional pattern statements. If a pattern statement (type
1228 conversion) is created in this function, its original statement is
1229 added to STMTS.
1231 Output:
1232 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1233 operands to use in the new pattern statement for STMT (will be created
1234 in vect_recog_over_widening_pattern ()).
1235 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1236 statements for STMT: the first one is a type promotion and the second
1237 one is the operation itself. We return the type promotion statement
1238 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1239 the second pattern statement. */
1241 static bool
1242 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1243 tree *op0, tree *op1, gimple **new_def_stmt,
1244 vec<gimple *> *stmts)
1246 enum tree_code code;
1247 tree const_oprnd, oprnd;
1248 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1249 gimple *def_stmt, *new_stmt;
1250 bool first = false;
1251 bool promotion;
1253 *op0 = NULL_TREE;
1254 *op1 = NULL_TREE;
1255 *new_def_stmt = NULL;
1257 if (!is_gimple_assign (stmt))
1258 return false;
1260 code = gimple_assign_rhs_code (stmt);
1261 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1262 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1263 return false;
1265 oprnd = gimple_assign_rhs1 (stmt);
1266 const_oprnd = gimple_assign_rhs2 (stmt);
1267 type = gimple_expr_type (stmt);
1269 if (TREE_CODE (oprnd) != SSA_NAME
1270 || TREE_CODE (const_oprnd) != INTEGER_CST)
1271 return false;
1273 /* If oprnd has other uses besides that in stmt we cannot mark it
1274 as being part of a pattern only. */
1275 if (!has_single_use (oprnd))
1276 return false;
1278 /* If we are in the middle of a sequence, we use DEF from a previous
1279 statement. Otherwise, OPRND has to be a result of type promotion. */
1280 if (*new_type)
1282 half_type = *new_type;
1283 oprnd = def;
1285 else
1287 first = true;
1288 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1289 &promotion)
1290 || !promotion
1291 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1292 return false;
1295 /* Can we perform the operation on a smaller type? */
1296 switch (code)
1298 case BIT_IOR_EXPR:
1299 case BIT_XOR_EXPR:
1300 case BIT_AND_EXPR:
1301 if (!int_fits_type_p (const_oprnd, half_type))
1303 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1304 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1305 return false;
1307 interm_type = build_nonstandard_integer_type (
1308 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1309 if (!int_fits_type_p (const_oprnd, interm_type))
1310 return false;
1313 break;
1315 case LSHIFT_EXPR:
1316 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1317 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1318 return false;
1320 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1321 (e.g., if the original value was char, the shift amount is at most 8
1322 if we want to use short). */
1323 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1324 return false;
1326 interm_type = build_nonstandard_integer_type (
1327 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1329 if (!vect_supportable_shift (code, interm_type))
1330 return false;
1332 break;
1334 case RSHIFT_EXPR:
1335 if (vect_supportable_shift (code, half_type))
1336 break;
1338 /* Try intermediate type - HALF_TYPE is not supported. */
1339 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1340 return false;
1342 interm_type = build_nonstandard_integer_type (
1343 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1345 if (!vect_supportable_shift (code, interm_type))
1346 return false;
1348 break;
1350 default:
1351 gcc_unreachable ();
1354 /* There are four possible cases:
1355 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1356 the first statement in the sequence)
1357 a. The original, HALF_TYPE, is not enough - we replace the promotion
1358 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1359 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1360 promotion.
1361 2. OPRND is defined by a pattern statement we created.
1362 a. Its type is not sufficient for the operation, we create a new stmt:
1363 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1364 this statement in NEW_DEF_STMT, and it is later put in
1365 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1366 b. OPRND is good to use in the new statement. */
1367 if (first)
1369 if (interm_type)
1371 /* Replace the original type conversion HALF_TYPE->TYPE with
1372 HALF_TYPE->INTERM_TYPE. */
1373 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1375 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1376 /* Check if the already created pattern stmt is what we need. */
1377 if (!is_gimple_assign (new_stmt)
1378 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1379 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1380 return false;
1382 stmts->safe_push (def_stmt);
1383 oprnd = gimple_assign_lhs (new_stmt);
1385 else
1387 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1388 oprnd = gimple_assign_rhs1 (def_stmt);
1389 new_oprnd = make_ssa_name (interm_type);
1390 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1391 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1392 stmts->safe_push (def_stmt);
1393 oprnd = new_oprnd;
1396 else
1398 /* Retrieve the operand before the type promotion. */
1399 oprnd = gimple_assign_rhs1 (def_stmt);
1402 else
1404 if (interm_type)
1406 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1407 new_oprnd = make_ssa_name (interm_type);
1408 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1409 oprnd = new_oprnd;
1410 *new_def_stmt = new_stmt;
1413 /* Otherwise, OPRND is already set. */
1416 if (interm_type)
1417 *new_type = interm_type;
1418 else
1419 *new_type = half_type;
1421 *op0 = oprnd;
1422 *op1 = fold_convert (*new_type, const_oprnd);
1424 return true;
1428 /* Try to find a statement or a sequence of statements that can be performed
1429 on a smaller type:
1431 type x_t;
1432 TYPE x_T, res0_T, res1_T;
1433 loop:
1434 S1 x_t = *p;
1435 S2 x_T = (TYPE) x_t;
1436 S3 res0_T = op (x_T, C0);
1437 S4 res1_T = op (res0_T, C1);
1438 S5 ... = () res1_T; - type demotion
1440 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1441 constants.
1442 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1443 be 'type' or some intermediate type. For now, we expect S5 to be a type
1444 demotion operation. We also check that S3 and S4 have only one use. */
1446 static gimple *
1447 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1448 tree *type_in, tree *type_out)
1450 gimple *stmt = stmts->pop ();
1451 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1452 *use_stmt = NULL;
1453 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1454 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1455 bool first;
1456 tree type = NULL;
1458 first = true;
1459 while (1)
1461 if (!vinfo_for_stmt (stmt)
1462 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1463 return NULL;
1465 new_def_stmt = NULL;
1466 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1467 &op0, &op1, &new_def_stmt,
1468 stmts))
1470 if (first)
1471 return NULL;
1472 else
1473 break;
1476 /* STMT can be performed on a smaller type. Check its uses. */
1477 use_stmt = vect_single_imm_use (stmt);
1478 if (!use_stmt || !is_gimple_assign (use_stmt))
1479 return NULL;
1481 /* Create pattern statement for STMT. */
1482 vectype = get_vectype_for_scalar_type (new_type);
1483 if (!vectype)
1484 return NULL;
1486 /* We want to collect all the statements for which we create pattern
1487 statetments, except for the case when the last statement in the
1488 sequence doesn't have a corresponding pattern statement. In such
1489 case we associate the last pattern statement with the last statement
1490 in the sequence. Therefore, we only add the original statement to
1491 the list if we know that it is not the last. */
1492 if (prev_stmt)
1493 stmts->safe_push (prev_stmt);
1495 var = vect_recog_temp_ssa_var (new_type, NULL);
1496 pattern_stmt
1497 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1498 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1499 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1501 if (dump_enabled_p ())
1503 dump_printf_loc (MSG_NOTE, vect_location,
1504 "created pattern stmt: ");
1505 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1506 dump_printf (MSG_NOTE, "\n");
1509 type = gimple_expr_type (stmt);
1510 prev_stmt = stmt;
1511 stmt = use_stmt;
1513 first = false;
1516 /* We got a sequence. We expect it to end with a type demotion operation.
1517 Otherwise, we quit (for now). There are three possible cases: the
1518 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1519 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1520 NEW_TYPE differs (we create a new conversion statement). */
1521 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1523 use_lhs = gimple_assign_lhs (use_stmt);
1524 use_type = TREE_TYPE (use_lhs);
1525 /* Support only type demotion or signedess change. */
1526 if (!INTEGRAL_TYPE_P (use_type)
1527 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1528 return NULL;
1530 /* Check that NEW_TYPE is not bigger than the conversion result. */
1531 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1532 return NULL;
1534 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1535 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1537 /* Create NEW_TYPE->USE_TYPE conversion. */
1538 new_oprnd = make_ssa_name (use_type);
1539 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1540 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1542 *type_in = get_vectype_for_scalar_type (new_type);
1543 *type_out = get_vectype_for_scalar_type (use_type);
1545 /* We created a pattern statement for the last statement in the
1546 sequence, so we don't need to associate it with the pattern
1547 statement created for PREV_STMT. Therefore, we add PREV_STMT
1548 to the list in order to mark it later in vect_pattern_recog_1. */
1549 if (prev_stmt)
1550 stmts->safe_push (prev_stmt);
1552 else
1554 if (prev_stmt)
1555 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1556 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1558 *type_in = vectype;
1559 *type_out = NULL_TREE;
1562 stmts->safe_push (use_stmt);
1564 else
1565 /* TODO: support general case, create a conversion to the correct type. */
1566 return NULL;
1568 /* Pattern detected. */
1569 if (dump_enabled_p ())
1571 dump_printf_loc (MSG_NOTE, vect_location,
1572 "vect_recog_over_widening_pattern: detected: ");
1573 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1574 dump_printf (MSG_NOTE, "\n");
1577 return pattern_stmt;
1580 /* Detect widening shift pattern:
1582 type a_t;
1583 TYPE a_T, res_T;
1585 S1 a_t = ;
1586 S2 a_T = (TYPE) a_t;
1587 S3 res_T = a_T << CONST;
1589 where type 'TYPE' is at least double the size of type 'type'.
1591 Also detect cases where the shift result is immediately converted
1592 to another type 'result_type' that is no larger in size than 'TYPE'.
1593 In those cases we perform a widen-shift that directly results in
1594 'result_type', to avoid a possible over-widening situation:
1596 type a_t;
1597 TYPE a_T, res_T;
1598 result_type res_result;
1600 S1 a_t = ;
1601 S2 a_T = (TYPE) a_t;
1602 S3 res_T = a_T << CONST;
1603 S4 res_result = (result_type) res_T;
1604 '--> res_result' = a_t w<< CONST;
1606 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1607 create an additional pattern stmt for S2 to create a variable of an
1608 intermediate type, and perform widen-shift on the intermediate type:
1610 type a_t;
1611 interm_type a_it;
1612 TYPE a_T, res_T, res_T';
1614 S1 a_t = ;
1615 S2 a_T = (TYPE) a_t;
1616 '--> a_it = (interm_type) a_t;
1617 S3 res_T = a_T << CONST;
1618 '--> res_T' = a_it <<* CONST;
1620 Input/Output:
1622 * STMTS: Contains a stmt from which the pattern search begins.
1623 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1624 in STMTS. When an intermediate type is used and a pattern statement is
1625 created for S2, we also put S2 here (before S3).
1627 Output:
1629 * TYPE_IN: The type of the input arguments to the pattern.
1631 * TYPE_OUT: The type of the output of this pattern.
1633 * Return value: A new stmt that will be used to replace the sequence of
1634 stmts that constitute the pattern. In this case it will be:
1635 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1637 static gimple *
1638 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1639 tree *type_in, tree *type_out)
1641 gimple *last_stmt = stmts->pop ();
1642 gimple *def_stmt0;
1643 tree oprnd0, oprnd1;
1644 tree type, half_type0;
1645 gimple *pattern_stmt;
1646 tree vectype, vectype_out = NULL_TREE;
1647 tree var;
1648 enum tree_code dummy_code;
1649 int dummy_int;
1650 vec<tree> dummy_vec;
1651 gimple *use_stmt;
1652 bool promotion;
1654 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1655 return NULL;
1657 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1658 return NULL;
1660 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1661 return NULL;
1663 oprnd0 = gimple_assign_rhs1 (last_stmt);
1664 oprnd1 = gimple_assign_rhs2 (last_stmt);
1665 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1666 return NULL;
1668 /* Check operand 0: it has to be defined by a type promotion. */
1669 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1670 &promotion)
1671 || !promotion)
1672 return NULL;
1674 /* Check operand 1: has to be positive. We check that it fits the type
1675 in vect_handle_widen_op_by_const (). */
1676 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1677 return NULL;
1679 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1680 type = gimple_expr_type (last_stmt);
1682 /* Check for subsequent conversion to another type. */
1683 use_stmt = vect_single_imm_use (last_stmt);
1684 if (use_stmt && is_gimple_assign (use_stmt)
1685 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1686 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1688 tree use_lhs = gimple_assign_lhs (use_stmt);
1689 tree use_type = TREE_TYPE (use_lhs);
1691 if (INTEGRAL_TYPE_P (use_type)
1692 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1694 last_stmt = use_stmt;
1695 type = use_type;
1699 /* Check if this a widening operation. */
1700 gimple *wstmt = NULL;
1701 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1702 &oprnd0, &wstmt,
1703 type, &half_type0, def_stmt0))
1704 return NULL;
1706 /* Pattern detected. */
1707 if (dump_enabled_p ())
1708 dump_printf_loc (MSG_NOTE, vect_location,
1709 "vect_recog_widen_shift_pattern: detected:\n");
1711 /* Check target support. */
1712 vectype = get_vectype_for_scalar_type (half_type0);
1713 vectype_out = get_vectype_for_scalar_type (type);
1715 if (!vectype
1716 || !vectype_out
1717 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1718 vectype_out, vectype,
1719 &dummy_code, &dummy_code,
1720 &dummy_int, &dummy_vec))
1721 return NULL;
1723 *type_in = vectype;
1724 *type_out = vectype_out;
1726 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1727 var = vect_recog_temp_ssa_var (type, NULL);
1728 pattern_stmt =
1729 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1730 if (wstmt)
1732 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1733 new_pattern_def_seq (stmt_vinfo, wstmt);
1734 stmt_vec_info new_stmt_info
1735 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1736 set_vinfo_for_stmt (wstmt, new_stmt_info);
1737 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1740 if (dump_enabled_p ())
1741 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1743 stmts->safe_push (last_stmt);
1744 return pattern_stmt;
1747 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1749 type a_t, b_t, c_t;
1751 S0 a_t = b_t r<< c_t;
1753 Input/Output:
1755 * STMTS: Contains a stmt from which the pattern search begins,
1756 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1757 with a sequence:
1759 S1 d_t = -c_t;
1760 S2 e_t = d_t & (B - 1);
1761 S3 f_t = b_t << c_t;
1762 S4 g_t = b_t >> e_t;
1763 S0 a_t = f_t | g_t;
1765 where B is element bitsize of type.
1767 Output:
1769 * TYPE_IN: The type of the input arguments to the pattern.
1771 * TYPE_OUT: The type of the output of this pattern.
1773 * Return value: A new stmt that will be used to replace the rotate
1774 S0 stmt. */
1776 static gimple *
1777 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1779 gimple *last_stmt = stmts->pop ();
1780 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1781 gimple *pattern_stmt, *def_stmt;
1782 enum tree_code rhs_code;
1783 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1784 vec_info *vinfo = stmt_vinfo->vinfo;
1785 enum vect_def_type dt;
1786 optab optab1, optab2;
1787 edge ext_def = NULL;
1789 if (!is_gimple_assign (last_stmt))
1790 return NULL;
1792 rhs_code = gimple_assign_rhs_code (last_stmt);
1793 switch (rhs_code)
1795 case LROTATE_EXPR:
1796 case RROTATE_EXPR:
1797 break;
1798 default:
1799 return NULL;
1802 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1803 return NULL;
1805 lhs = gimple_assign_lhs (last_stmt);
1806 oprnd0 = gimple_assign_rhs1 (last_stmt);
1807 type = TREE_TYPE (oprnd0);
1808 oprnd1 = gimple_assign_rhs2 (last_stmt);
1809 if (TREE_CODE (oprnd0) != SSA_NAME
1810 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1811 || !INTEGRAL_TYPE_P (type)
1812 || !TYPE_UNSIGNED (type))
1813 return NULL;
1815 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1816 return NULL;
1818 if (dt != vect_internal_def
1819 && dt != vect_constant_def
1820 && dt != vect_external_def)
1821 return NULL;
1823 vectype = get_vectype_for_scalar_type (type);
1824 if (vectype == NULL_TREE)
1825 return NULL;
1827 /* If vector/vector or vector/scalar rotate is supported by the target,
1828 don't do anything here. */
1829 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1830 if (optab1
1831 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1832 return NULL;
1834 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1836 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1837 if (optab2
1838 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1839 return NULL;
1842 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1843 don't do anything here either. */
1844 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1845 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1846 if (!optab1
1847 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1848 || !optab2
1849 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1851 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1852 return NULL;
1853 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1854 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1855 if (!optab1
1856 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1857 || !optab2
1858 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1859 return NULL;
1862 *type_in = vectype;
1863 *type_out = vectype;
1864 if (*type_in == NULL_TREE)
1865 return NULL;
1867 if (dt == vect_external_def
1868 && TREE_CODE (oprnd1) == SSA_NAME
1869 && is_a <loop_vec_info> (vinfo))
1871 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1872 ext_def = loop_preheader_edge (loop);
1873 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1875 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1876 if (bb == NULL
1877 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1878 ext_def = NULL;
1882 def = NULL_TREE;
1883 if (TREE_CODE (oprnd1) == INTEGER_CST
1884 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1885 def = oprnd1;
1886 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1888 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1889 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1890 && TYPE_PRECISION (TREE_TYPE (rhs1))
1891 == TYPE_PRECISION (type))
1892 def = rhs1;
1895 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1896 if (def == NULL_TREE)
1898 def = vect_recog_temp_ssa_var (type, NULL);
1899 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1900 if (ext_def)
1902 basic_block new_bb
1903 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1904 gcc_assert (!new_bb);
1906 else
1907 append_pattern_def_seq (stmt_vinfo, def_stmt);
1909 stype = TREE_TYPE (def);
1911 if (TREE_CODE (def) == INTEGER_CST)
1913 if (!tree_fits_uhwi_p (def)
1914 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1915 || integer_zerop (def))
1916 return NULL;
1917 def2 = build_int_cst (stype,
1918 GET_MODE_PRECISION (TYPE_MODE (type))
1919 - tree_to_uhwi (def));
1921 else
1923 tree vecstype = get_vectype_for_scalar_type (stype);
1924 stmt_vec_info def_stmt_vinfo;
1926 if (vecstype == NULL_TREE)
1927 return NULL;
1928 def2 = vect_recog_temp_ssa_var (stype, NULL);
1929 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1930 if (ext_def)
1932 basic_block new_bb
1933 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1934 gcc_assert (!new_bb);
1936 else
1938 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1939 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1940 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1941 append_pattern_def_seq (stmt_vinfo, def_stmt);
1944 def2 = vect_recog_temp_ssa_var (stype, NULL);
1945 tree mask
1946 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1947 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1948 gimple_assign_lhs (def_stmt), mask);
1949 if (ext_def)
1951 basic_block new_bb
1952 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1953 gcc_assert (!new_bb);
1955 else
1957 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1958 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1959 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1960 append_pattern_def_seq (stmt_vinfo, def_stmt);
1964 var1 = vect_recog_temp_ssa_var (type, NULL);
1965 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1966 ? LSHIFT_EXPR : RSHIFT_EXPR,
1967 oprnd0, def);
1968 append_pattern_def_seq (stmt_vinfo, def_stmt);
1970 var2 = vect_recog_temp_ssa_var (type, NULL);
1971 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1972 ? RSHIFT_EXPR : LSHIFT_EXPR,
1973 oprnd0, def2);
1974 append_pattern_def_seq (stmt_vinfo, def_stmt);
1976 /* Pattern detected. */
1977 if (dump_enabled_p ())
1978 dump_printf_loc (MSG_NOTE, vect_location,
1979 "vect_recog_rotate_pattern: detected:\n");
1981 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1982 var = vect_recog_temp_ssa_var (type, NULL);
1983 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1985 if (dump_enabled_p ())
1986 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1988 stmts->safe_push (last_stmt);
1989 return pattern_stmt;
1992 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1993 vectorized:
1995 type a_t;
1996 TYPE b_T, res_T;
1998 S1 a_t = ;
1999 S2 b_T = ;
2000 S3 res_T = b_T op a_t;
2002 where type 'TYPE' is a type with different size than 'type',
2003 and op is <<, >> or rotate.
2005 Also detect cases:
2007 type a_t;
2008 TYPE b_T, c_T, res_T;
2010 S0 c_T = ;
2011 S1 a_t = (type) c_T;
2012 S2 b_T = ;
2013 S3 res_T = b_T op a_t;
2015 Input/Output:
2017 * STMTS: Contains a stmt from which the pattern search begins,
2018 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2019 with a shift/rotate which has same type on both operands, in the
2020 second case just b_T op c_T, in the first case with added cast
2021 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2023 Output:
2025 * TYPE_IN: The type of the input arguments to the pattern.
2027 * TYPE_OUT: The type of the output of this pattern.
2029 * Return value: A new stmt that will be used to replace the shift/rotate
2030 S3 stmt. */
2032 static gimple *
2033 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2034 tree *type_in, tree *type_out)
2036 gimple *last_stmt = stmts->pop ();
2037 tree oprnd0, oprnd1, lhs, var;
2038 gimple *pattern_stmt, *def_stmt;
2039 enum tree_code rhs_code;
2040 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2041 vec_info *vinfo = stmt_vinfo->vinfo;
2042 enum vect_def_type dt;
2044 if (!is_gimple_assign (last_stmt))
2045 return NULL;
2047 rhs_code = gimple_assign_rhs_code (last_stmt);
2048 switch (rhs_code)
2050 case LSHIFT_EXPR:
2051 case RSHIFT_EXPR:
2052 case LROTATE_EXPR:
2053 case RROTATE_EXPR:
2054 break;
2055 default:
2056 return NULL;
2059 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2060 return NULL;
2062 lhs = gimple_assign_lhs (last_stmt);
2063 oprnd0 = gimple_assign_rhs1 (last_stmt);
2064 oprnd1 = gimple_assign_rhs2 (last_stmt);
2065 if (TREE_CODE (oprnd0) != SSA_NAME
2066 || TREE_CODE (oprnd1) != SSA_NAME
2067 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2068 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2069 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2070 || TYPE_PRECISION (TREE_TYPE (lhs))
2071 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2072 return NULL;
2074 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2075 return NULL;
2077 if (dt != vect_internal_def)
2078 return NULL;
2080 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2081 *type_out = *type_in;
2082 if (*type_in == NULL_TREE)
2083 return NULL;
2085 tree def = NULL_TREE;
2086 if (gimple_assign_cast_p (def_stmt))
2088 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2089 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2090 && TYPE_PRECISION (TREE_TYPE (rhs1))
2091 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2092 def = rhs1;
2095 if (def == NULL_TREE)
2097 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2098 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2099 new_pattern_def_seq (stmt_vinfo, def_stmt);
2102 /* Pattern detected. */
2103 if (dump_enabled_p ())
2104 dump_printf_loc (MSG_NOTE, vect_location,
2105 "vect_recog_vector_vector_shift_pattern: detected:\n");
2107 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2108 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2109 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2111 if (dump_enabled_p ())
2112 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2114 stmts->safe_push (last_stmt);
2115 return pattern_stmt;
2118 /* Detect multiplication by constant which are postive or negatives of power 2,
2119 and convert them to shift patterns.
2121 Mult with constants that are postive power of two.
2122 type a_t;
2123 type b_t
2124 S1: b_t = a_t * n
2128 Mult with constants that are negative power of two.
2129 S2: b_t = a_t * -n
2131 Input/Output:
2133 STMTS: Contains a stmt from which the pattern search begins,
2134 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2135 constant operand is a power of 2.
2136 type a_t, b_t
2137 S1': b_t = a_t << log2 (n)
2139 Convert the mult operation to LSHIFT and followed by a NEGATE
2140 if constant operand is a negative power of 2.
2141 type a_t, b_t, res_T;
2142 S2': b_t = a_t << log2 (n)
2143 S3': res_T = - (b_t)
2145 Output:
2147 * TYPE_IN: The type of the input arguments to the pattern.
2149 * TYPE_OUT: The type of the output of this pattern.
2151 * Return value: A new stmt that will be used to replace the multiplication
2152 S1 or S2 stmt. */
2154 static gimple *
2155 vect_recog_mult_pattern (vec<gimple *> *stmts,
2156 tree *type_in, tree *type_out)
2158 gimple *last_stmt = stmts->pop ();
2159 tree oprnd0, oprnd1, vectype, itype;
2160 gimple *pattern_stmt, *def_stmt;
2161 optab optab;
2162 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2163 int power2_val, power2_neg_val;
2164 tree shift;
2166 if (!is_gimple_assign (last_stmt))
2167 return NULL;
2169 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2170 return NULL;
2172 oprnd0 = gimple_assign_rhs1 (last_stmt);
2173 oprnd1 = gimple_assign_rhs2 (last_stmt);
2174 itype = TREE_TYPE (oprnd0);
2176 if (TREE_CODE (oprnd0) != SSA_NAME
2177 || TREE_CODE (oprnd1) != INTEGER_CST
2178 || !INTEGRAL_TYPE_P (itype)
2179 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2180 return NULL;
2182 vectype = get_vectype_for_scalar_type (itype);
2183 if (vectype == NULL_TREE)
2184 return NULL;
2186 /* If the target can handle vectorized multiplication natively,
2187 don't attempt to optimize this. */
2188 optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2189 if (optab != unknown_optab)
2191 machine_mode vec_mode = TYPE_MODE (vectype);
2192 int icode = (int) optab_handler (optab, vec_mode);
2193 if (icode != CODE_FOR_nothing)
2194 return NULL;
2197 /* If target cannot handle vector left shift then we cannot
2198 optimize and bail out. */
2199 optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2200 if (!optab
2201 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2202 return NULL;
2204 power2_val = wi::exact_log2 (oprnd1);
2205 power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2207 /* Handle constant operands that are postive or negative powers of 2. */
2208 if (power2_val != -1)
2210 shift = build_int_cst (itype, power2_val);
2211 pattern_stmt
2212 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2213 LSHIFT_EXPR, oprnd0, shift);
2215 else if (power2_neg_val != -1)
2217 /* If the target cannot handle vector NEGATE then we cannot
2218 do the optimization. */
2219 optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2220 if (!optab
2221 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2222 return NULL;
2224 shift = build_int_cst (itype, power2_neg_val);
2225 def_stmt
2226 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2227 LSHIFT_EXPR, oprnd0, shift);
2228 new_pattern_def_seq (stmt_vinfo, def_stmt);
2229 pattern_stmt
2230 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2231 NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2233 else
2234 return NULL;
2236 /* Pattern detected. */
2237 if (dump_enabled_p ())
2238 dump_printf_loc (MSG_NOTE, vect_location,
2239 "vect_recog_mult_pattern: detected:\n");
2241 if (dump_enabled_p ())
2242 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2243 pattern_stmt,0);
2245 stmts->safe_push (last_stmt);
2246 *type_in = vectype;
2247 *type_out = vectype;
2249 return pattern_stmt;
2252 /* Detect a signed division by a constant that wouldn't be
2253 otherwise vectorized:
2255 type a_t, b_t;
2257 S1 a_t = b_t / N;
2259 where type 'type' is an integral type and N is a constant.
2261 Similarly handle modulo by a constant:
2263 S4 a_t = b_t % N;
2265 Input/Output:
2267 * STMTS: Contains a stmt from which the pattern search begins,
2268 i.e. the division stmt. S1 is replaced by if N is a power
2269 of two constant and type is signed:
2270 S3 y_t = b_t < 0 ? N - 1 : 0;
2271 S2 x_t = b_t + y_t;
2272 S1' a_t = x_t >> log2 (N);
2274 S4 is replaced if N is a power of two constant and
2275 type is signed by (where *_T temporaries have unsigned type):
2276 S9 y_T = b_t < 0 ? -1U : 0U;
2277 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2278 S7 z_t = (type) z_T;
2279 S6 w_t = b_t + z_t;
2280 S5 x_t = w_t & (N - 1);
2281 S4' a_t = x_t - z_t;
2283 Output:
2285 * TYPE_IN: The type of the input arguments to the pattern.
2287 * TYPE_OUT: The type of the output of this pattern.
2289 * Return value: A new stmt that will be used to replace the division
2290 S1 or modulo S4 stmt. */
2292 static gimple *
2293 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2294 tree *type_in, tree *type_out)
2296 gimple *last_stmt = stmts->pop ();
2297 tree oprnd0, oprnd1, vectype, itype, cond;
2298 gimple *pattern_stmt, *def_stmt;
2299 enum tree_code rhs_code;
2300 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2301 vec_info *vinfo = stmt_vinfo->vinfo;
2302 optab optab;
2303 tree q;
2304 int dummy_int, prec;
2305 stmt_vec_info def_stmt_vinfo;
2307 if (!is_gimple_assign (last_stmt))
2308 return NULL;
2310 rhs_code = gimple_assign_rhs_code (last_stmt);
2311 switch (rhs_code)
2313 case TRUNC_DIV_EXPR:
2314 case TRUNC_MOD_EXPR:
2315 break;
2316 default:
2317 return NULL;
2320 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2321 return NULL;
2323 oprnd0 = gimple_assign_rhs1 (last_stmt);
2324 oprnd1 = gimple_assign_rhs2 (last_stmt);
2325 itype = TREE_TYPE (oprnd0);
2326 if (TREE_CODE (oprnd0) != SSA_NAME
2327 || TREE_CODE (oprnd1) != INTEGER_CST
2328 || TREE_CODE (itype) != INTEGER_TYPE
2329 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2330 return NULL;
2332 vectype = get_vectype_for_scalar_type (itype);
2333 if (vectype == NULL_TREE)
2334 return NULL;
2336 /* If the target can handle vectorized division or modulo natively,
2337 don't attempt to optimize this. */
2338 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2339 if (optab != unknown_optab)
2341 machine_mode vec_mode = TYPE_MODE (vectype);
2342 int icode = (int) optab_handler (optab, vec_mode);
2343 if (icode != CODE_FOR_nothing)
2344 return NULL;
2347 prec = TYPE_PRECISION (itype);
2348 if (integer_pow2p (oprnd1))
2350 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2351 return NULL;
2353 /* Pattern detected. */
2354 if (dump_enabled_p ())
2355 dump_printf_loc (MSG_NOTE, vect_location,
2356 "vect_recog_divmod_pattern: detected:\n");
2358 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2359 build_int_cst (itype, 0));
2360 if (rhs_code == TRUNC_DIV_EXPR)
2362 tree var = vect_recog_temp_ssa_var (itype, NULL);
2363 tree shift;
2364 def_stmt
2365 = gimple_build_assign (var, COND_EXPR, cond,
2366 fold_build2 (MINUS_EXPR, itype, oprnd1,
2367 build_int_cst (itype, 1)),
2368 build_int_cst (itype, 0));
2369 new_pattern_def_seq (stmt_vinfo, def_stmt);
2370 var = vect_recog_temp_ssa_var (itype, NULL);
2371 def_stmt
2372 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2373 gimple_assign_lhs (def_stmt));
2374 append_pattern_def_seq (stmt_vinfo, def_stmt);
2376 shift = build_int_cst (itype, tree_log2 (oprnd1));
2377 pattern_stmt
2378 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2379 RSHIFT_EXPR, var, shift);
2381 else
2383 tree signmask;
2384 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2385 if (compare_tree_int (oprnd1, 2) == 0)
2387 signmask = vect_recog_temp_ssa_var (itype, NULL);
2388 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2389 build_int_cst (itype, 1),
2390 build_int_cst (itype, 0));
2391 append_pattern_def_seq (stmt_vinfo, def_stmt);
2393 else
2395 tree utype
2396 = build_nonstandard_integer_type (prec, 1);
2397 tree vecutype = get_vectype_for_scalar_type (utype);
2398 tree shift
2399 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2400 - tree_log2 (oprnd1));
2401 tree var = vect_recog_temp_ssa_var (utype, NULL);
2403 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2404 build_int_cst (utype, -1),
2405 build_int_cst (utype, 0));
2406 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2407 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2408 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2409 append_pattern_def_seq (stmt_vinfo, def_stmt);
2410 var = vect_recog_temp_ssa_var (utype, NULL);
2411 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2412 gimple_assign_lhs (def_stmt),
2413 shift);
2414 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2415 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2416 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2417 append_pattern_def_seq (stmt_vinfo, def_stmt);
2418 signmask = vect_recog_temp_ssa_var (itype, NULL);
2419 def_stmt
2420 = gimple_build_assign (signmask, NOP_EXPR, var);
2421 append_pattern_def_seq (stmt_vinfo, def_stmt);
2423 def_stmt
2424 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2425 PLUS_EXPR, oprnd0, signmask);
2426 append_pattern_def_seq (stmt_vinfo, def_stmt);
2427 def_stmt
2428 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2429 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2430 fold_build2 (MINUS_EXPR, itype, oprnd1,
2431 build_int_cst (itype, 1)));
2432 append_pattern_def_seq (stmt_vinfo, def_stmt);
2434 pattern_stmt
2435 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2436 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2437 signmask);
2440 if (dump_enabled_p ())
2441 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2444 stmts->safe_push (last_stmt);
2446 *type_in = vectype;
2447 *type_out = vectype;
2448 return pattern_stmt;
2451 if (prec > HOST_BITS_PER_WIDE_INT
2452 || integer_zerop (oprnd1))
2453 return NULL;
2455 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2456 return NULL;
2458 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2460 if (TYPE_UNSIGNED (itype))
2462 unsigned HOST_WIDE_INT mh, ml;
2463 int pre_shift, post_shift;
2464 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2465 & GET_MODE_MASK (TYPE_MODE (itype)));
2466 tree t1, t2, t3, t4;
2468 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2469 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2470 return NULL;
2472 /* Find a suitable multiplier and right shift count
2473 instead of multiplying with D. */
2474 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2476 /* If the suggested multiplier is more than SIZE bits, we can do better
2477 for even divisors, using an initial right shift. */
2478 if (mh != 0 && (d & 1) == 0)
2480 pre_shift = floor_log2 (d & -d);
2481 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2482 &ml, &post_shift, &dummy_int);
2483 gcc_assert (!mh);
2485 else
2486 pre_shift = 0;
2488 if (mh != 0)
2490 if (post_shift - 1 >= prec)
2491 return NULL;
2493 /* t1 = oprnd0 h* ml;
2494 t2 = oprnd0 - t1;
2495 t3 = t2 >> 1;
2496 t4 = t1 + t3;
2497 q = t4 >> (post_shift - 1); */
2498 t1 = vect_recog_temp_ssa_var (itype, NULL);
2499 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2500 build_int_cst (itype, ml));
2501 append_pattern_def_seq (stmt_vinfo, def_stmt);
2503 t2 = vect_recog_temp_ssa_var (itype, NULL);
2504 def_stmt
2505 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2506 append_pattern_def_seq (stmt_vinfo, def_stmt);
2508 t3 = vect_recog_temp_ssa_var (itype, NULL);
2509 def_stmt
2510 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2511 append_pattern_def_seq (stmt_vinfo, def_stmt);
2513 t4 = vect_recog_temp_ssa_var (itype, NULL);
2514 def_stmt
2515 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2517 if (post_shift != 1)
2519 append_pattern_def_seq (stmt_vinfo, def_stmt);
2521 q = vect_recog_temp_ssa_var (itype, NULL);
2522 pattern_stmt
2523 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2524 build_int_cst (itype, post_shift - 1));
2526 else
2528 q = t4;
2529 pattern_stmt = def_stmt;
2532 else
2534 if (pre_shift >= prec || post_shift >= prec)
2535 return NULL;
2537 /* t1 = oprnd0 >> pre_shift;
2538 t2 = t1 h* ml;
2539 q = t2 >> post_shift; */
2540 if (pre_shift)
2542 t1 = vect_recog_temp_ssa_var (itype, NULL);
2543 def_stmt
2544 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2545 build_int_cst (NULL, pre_shift));
2546 append_pattern_def_seq (stmt_vinfo, def_stmt);
2548 else
2549 t1 = oprnd0;
2551 t2 = vect_recog_temp_ssa_var (itype, NULL);
2552 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2553 build_int_cst (itype, ml));
2555 if (post_shift)
2557 append_pattern_def_seq (stmt_vinfo, def_stmt);
2559 q = vect_recog_temp_ssa_var (itype, NULL);
2560 def_stmt
2561 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2562 build_int_cst (itype, post_shift));
2564 else
2565 q = t2;
2567 pattern_stmt = def_stmt;
2570 else
2572 unsigned HOST_WIDE_INT ml;
2573 int post_shift;
2574 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2575 unsigned HOST_WIDE_INT abs_d;
2576 bool add = false;
2577 tree t1, t2, t3, t4;
2579 /* Give up for -1. */
2580 if (d == -1)
2581 return NULL;
2583 /* Since d might be INT_MIN, we have to cast to
2584 unsigned HOST_WIDE_INT before negating to avoid
2585 undefined signed overflow. */
2586 abs_d = (d >= 0
2587 ? (unsigned HOST_WIDE_INT) d
2588 : - (unsigned HOST_WIDE_INT) d);
2590 /* n rem d = n rem -d */
2591 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2593 d = abs_d;
2594 oprnd1 = build_int_cst (itype, abs_d);
2596 else if (HOST_BITS_PER_WIDE_INT >= prec
2597 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2598 /* This case is not handled correctly below. */
2599 return NULL;
2601 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2602 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2604 add = true;
2605 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2607 if (post_shift >= prec)
2608 return NULL;
2610 /* t1 = oprnd0 h* ml; */
2611 t1 = vect_recog_temp_ssa_var (itype, NULL);
2612 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2613 build_int_cst (itype, ml));
2615 if (add)
2617 /* t2 = t1 + oprnd0; */
2618 append_pattern_def_seq (stmt_vinfo, def_stmt);
2619 t2 = vect_recog_temp_ssa_var (itype, NULL);
2620 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2622 else
2623 t2 = t1;
2625 if (post_shift)
2627 /* t3 = t2 >> post_shift; */
2628 append_pattern_def_seq (stmt_vinfo, def_stmt);
2629 t3 = vect_recog_temp_ssa_var (itype, NULL);
2630 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2631 build_int_cst (itype, post_shift));
2633 else
2634 t3 = t2;
2636 wide_int oprnd0_min, oprnd0_max;
2637 int msb = 1;
2638 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2640 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2641 msb = 0;
2642 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2643 msb = -1;
2646 if (msb == 0 && d >= 0)
2648 /* q = t3; */
2649 q = t3;
2650 pattern_stmt = def_stmt;
2652 else
2654 /* t4 = oprnd0 >> (prec - 1);
2655 or if we know from VRP that oprnd0 >= 0
2656 t4 = 0;
2657 or if we know from VRP that oprnd0 < 0
2658 t4 = -1; */
2659 append_pattern_def_seq (stmt_vinfo, def_stmt);
2660 t4 = vect_recog_temp_ssa_var (itype, NULL);
2661 if (msb != 1)
2662 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2663 build_int_cst (itype, msb));
2664 else
2665 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2666 build_int_cst (itype, prec - 1));
2667 append_pattern_def_seq (stmt_vinfo, def_stmt);
2669 /* q = t3 - t4; or q = t4 - t3; */
2670 q = vect_recog_temp_ssa_var (itype, NULL);
2671 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2672 d < 0 ? t3 : t4);
2676 if (rhs_code == TRUNC_MOD_EXPR)
2678 tree r, t1;
2680 /* We divided. Now finish by:
2681 t1 = q * oprnd1;
2682 r = oprnd0 - t1; */
2683 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2685 t1 = vect_recog_temp_ssa_var (itype, NULL);
2686 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2687 append_pattern_def_seq (stmt_vinfo, def_stmt);
2689 r = vect_recog_temp_ssa_var (itype, NULL);
2690 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2693 /* Pattern detected. */
2694 if (dump_enabled_p ())
2696 dump_printf_loc (MSG_NOTE, vect_location,
2697 "vect_recog_divmod_pattern: detected: ");
2698 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2699 dump_printf (MSG_NOTE, "\n");
2702 stmts->safe_push (last_stmt);
2704 *type_in = vectype;
2705 *type_out = vectype;
2706 return pattern_stmt;
2709 /* Function vect_recog_mixed_size_cond_pattern
2711 Try to find the following pattern:
2713 type x_t, y_t;
2714 TYPE a_T, b_T, c_T;
2715 loop:
2716 S1 a_T = x_t CMP y_t ? b_T : c_T;
2718 where type 'TYPE' is an integral type which has different size
2719 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2720 than 'type', the constants need to fit into an integer type
2721 with the same width as 'type') or results of conversion from 'type'.
2723 Input:
2725 * LAST_STMT: A stmt from which the pattern search begins.
2727 Output:
2729 * TYPE_IN: The type of the input arguments to the pattern.
2731 * TYPE_OUT: The type of the output of this pattern.
2733 * Return value: A new stmt that will be used to replace the pattern.
2734 Additionally a def_stmt is added.
2736 a_it = x_t CMP y_t ? b_it : c_it;
2737 a_T = (TYPE) a_it; */
2739 static gimple *
2740 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
2741 tree *type_out)
2743 gimple *last_stmt = (*stmts)[0];
2744 tree cond_expr, then_clause, else_clause;
2745 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2746 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2747 gimple *pattern_stmt, *def_stmt;
2748 vec_info *vinfo = stmt_vinfo->vinfo;
2749 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2750 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
2751 bool promotion;
2752 tree comp_scalar_type;
2754 if (!is_gimple_assign (last_stmt)
2755 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2756 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2757 return NULL;
2759 cond_expr = gimple_assign_rhs1 (last_stmt);
2760 then_clause = gimple_assign_rhs2 (last_stmt);
2761 else_clause = gimple_assign_rhs3 (last_stmt);
2763 if (!COMPARISON_CLASS_P (cond_expr))
2764 return NULL;
2766 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2767 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2768 if (comp_vectype == NULL_TREE)
2769 return NULL;
2771 type = gimple_expr_type (last_stmt);
2772 if (types_compatible_p (type, comp_scalar_type)
2773 || ((TREE_CODE (then_clause) != INTEGER_CST
2774 || TREE_CODE (else_clause) != INTEGER_CST)
2775 && !INTEGRAL_TYPE_P (comp_scalar_type))
2776 || !INTEGRAL_TYPE_P (type))
2777 return NULL;
2779 if ((TREE_CODE (then_clause) != INTEGER_CST
2780 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2781 &def_stmt0, &promotion))
2782 || (TREE_CODE (else_clause) != INTEGER_CST
2783 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2784 &def_stmt1, &promotion)))
2785 return NULL;
2787 if (orig_type0 && orig_type1
2788 && !types_compatible_p (orig_type0, orig_type1))
2789 return NULL;
2791 if (orig_type0)
2793 if (!types_compatible_p (orig_type0, comp_scalar_type))
2794 return NULL;
2795 then_clause = gimple_assign_rhs1 (def_stmt0);
2796 itype = orig_type0;
2799 if (orig_type1)
2801 if (!types_compatible_p (orig_type1, comp_scalar_type))
2802 return NULL;
2803 else_clause = gimple_assign_rhs1 (def_stmt1);
2804 itype = orig_type1;
2808 HOST_WIDE_INT cmp_mode_size
2809 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2811 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
2812 return NULL;
2814 vectype = get_vectype_for_scalar_type (type);
2815 if (vectype == NULL_TREE)
2816 return NULL;
2818 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2819 return NULL;
2821 if (itype == NULL_TREE)
2822 itype = build_nonstandard_integer_type (cmp_mode_size,
2823 TYPE_UNSIGNED (type));
2825 if (itype == NULL_TREE
2826 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
2827 return NULL;
2829 vecitype = get_vectype_for_scalar_type (itype);
2830 if (vecitype == NULL_TREE)
2831 return NULL;
2833 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2834 return NULL;
2836 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
2838 if ((TREE_CODE (then_clause) == INTEGER_CST
2839 && !int_fits_type_p (then_clause, itype))
2840 || (TREE_CODE (else_clause) == INTEGER_CST
2841 && !int_fits_type_p (else_clause, itype)))
2842 return NULL;
2845 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2846 COND_EXPR, unshare_expr (cond_expr),
2847 fold_convert (itype, then_clause),
2848 fold_convert (itype, else_clause));
2849 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2850 NOP_EXPR, gimple_assign_lhs (def_stmt));
2852 new_pattern_def_seq (stmt_vinfo, def_stmt);
2853 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
2854 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2855 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2856 *type_in = vecitype;
2857 *type_out = vectype;
2859 if (dump_enabled_p ())
2860 dump_printf_loc (MSG_NOTE, vect_location,
2861 "vect_recog_mixed_size_cond_pattern: detected:\n");
2863 return pattern_stmt;
2867 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2868 true if bool VAR can and should be optimized that way. Assume it shouldn't
2869 in case it's a result of a comparison which can be directly vectorized into
2870 a vector comparison. */
2872 static bool
2873 check_bool_pattern (tree var, vec_info *vinfo)
2875 gimple *def_stmt;
2876 enum vect_def_type dt;
2877 tree rhs1;
2878 enum tree_code rhs_code;
2880 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
2881 return false;
2883 if (dt != vect_internal_def)
2884 return false;
2886 if (!is_gimple_assign (def_stmt))
2887 return false;
2889 if (!has_single_use (var))
2890 return false;
2892 rhs1 = gimple_assign_rhs1 (def_stmt);
2893 rhs_code = gimple_assign_rhs_code (def_stmt);
2894 switch (rhs_code)
2896 case SSA_NAME:
2897 return check_bool_pattern (rhs1, vinfo);
2899 CASE_CONVERT:
2900 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2901 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2902 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2903 return false;
2904 return check_bool_pattern (rhs1, vinfo);
2906 case BIT_NOT_EXPR:
2907 return check_bool_pattern (rhs1, vinfo);
2909 case BIT_AND_EXPR:
2910 case BIT_IOR_EXPR:
2911 case BIT_XOR_EXPR:
2912 if (!check_bool_pattern (rhs1, vinfo))
2913 return false;
2914 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
2916 default:
2917 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2919 tree vecitype, comp_vectype, mask_type;
2921 /* If the comparison can throw, then is_gimple_condexpr will be
2922 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2923 if (stmt_could_throw_p (def_stmt))
2924 return false;
2926 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2927 if (comp_vectype == NULL_TREE)
2928 return false;
2930 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
2931 if (mask_type
2932 && expand_vec_cmp_expr_p (comp_vectype, mask_type))
2933 return false;
2935 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2937 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2938 tree itype
2939 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2940 vecitype = get_vectype_for_scalar_type (itype);
2941 if (vecitype == NULL_TREE)
2942 return false;
2944 else
2945 vecitype = comp_vectype;
2946 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2948 return false;
2953 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2954 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2955 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2957 static tree
2958 adjust_bool_pattern_cast (tree type, tree var)
2960 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2961 gimple *cast_stmt, *pattern_stmt;
2963 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2964 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2965 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2966 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2967 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2968 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2969 return gimple_assign_lhs (cast_stmt);
2973 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2974 recursively. VAR is an SSA_NAME that should be transformed from bool
2975 to a wider integer type, OUT_TYPE is the desired final integer type of
2976 the whole pattern, TRUEVAL should be NULL unless optimizing
2977 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2978 in the then_clause, STMTS is where statements with added pattern stmts
2979 should be pushed to. */
2981 static tree
2982 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2983 vec<gimple *> *stmts)
2985 gimple *stmt = SSA_NAME_DEF_STMT (var);
2986 enum tree_code rhs_code, def_rhs_code;
2987 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2988 location_t loc;
2989 gimple *pattern_stmt, *def_stmt;
2991 rhs1 = gimple_assign_rhs1 (stmt);
2992 rhs2 = gimple_assign_rhs2 (stmt);
2993 rhs_code = gimple_assign_rhs_code (stmt);
2994 loc = gimple_location (stmt);
2995 switch (rhs_code)
2997 case SSA_NAME:
2998 CASE_CONVERT:
2999 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3000 itype = TREE_TYPE (irhs1);
3001 pattern_stmt
3002 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3003 SSA_NAME, irhs1);
3004 break;
3006 case BIT_NOT_EXPR:
3007 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3008 itype = TREE_TYPE (irhs1);
3009 pattern_stmt
3010 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3011 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3012 break;
3014 case BIT_AND_EXPR:
3015 /* Try to optimize x = y & (a < b ? 1 : 0); into
3016 x = (a < b ? y : 0);
3018 E.g. for:
3019 bool a_b, b_b, c_b;
3020 TYPE d_T;
3022 S1 a_b = x1 CMP1 y1;
3023 S2 b_b = x2 CMP2 y2;
3024 S3 c_b = a_b & b_b;
3025 S4 d_T = (TYPE) c_b;
3027 we would normally emit:
3029 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3030 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3031 S3' c_T = a_T & b_T;
3032 S4' d_T = c_T;
3034 but we can save one stmt by using the
3035 result of one of the COND_EXPRs in the other COND_EXPR and leave
3036 BIT_AND_EXPR stmt out:
3038 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3039 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3040 S4' f_T = c_T;
3042 At least when VEC_COND_EXPR is implemented using masks
3043 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3044 computes the comparison masks and ands it, in one case with
3045 all ones vector, in the other case with a vector register.
3046 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3047 often more expensive. */
3048 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3049 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3050 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3052 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3053 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3054 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3055 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3057 gimple *tstmt;
3058 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3059 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3060 tstmt = stmts->pop ();
3061 gcc_assert (tstmt == def_stmt);
3062 stmts->quick_push (stmt);
3063 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3064 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3065 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3066 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3067 return irhs2;
3069 else
3070 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3071 goto and_ior_xor;
3073 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3074 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3075 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3077 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3078 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3079 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3080 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3082 gimple *tstmt;
3083 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3084 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3085 tstmt = stmts->pop ();
3086 gcc_assert (tstmt == def_stmt);
3087 stmts->quick_push (stmt);
3088 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3089 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3090 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3091 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3092 return irhs1;
3094 else
3095 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3096 goto and_ior_xor;
3098 /* FALLTHRU */
3099 case BIT_IOR_EXPR:
3100 case BIT_XOR_EXPR:
3101 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3102 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3103 and_ior_xor:
3104 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3105 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3107 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3108 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3109 int out_prec = TYPE_PRECISION (out_type);
3110 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3111 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3112 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3113 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3114 else
3116 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3117 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3120 itype = TREE_TYPE (irhs1);
3121 pattern_stmt
3122 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3123 rhs_code, irhs1, irhs2);
3124 break;
3126 default:
3127 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3128 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3129 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3130 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3131 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3133 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3134 itype
3135 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3137 else
3138 itype = TREE_TYPE (rhs1);
3139 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3140 if (trueval == NULL_TREE)
3141 trueval = build_int_cst (itype, 1);
3142 else
3143 gcc_checking_assert (useless_type_conversion_p (itype,
3144 TREE_TYPE (trueval)));
3145 pattern_stmt
3146 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3147 COND_EXPR, cond_expr, trueval,
3148 build_int_cst (itype, 0));
3149 break;
3152 stmts->safe_push (stmt);
3153 gimple_set_location (pattern_stmt, loc);
3154 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3155 return gimple_assign_lhs (pattern_stmt);
3159 /* Return the proper type for converting bool VAR into
3160 an integer value or NULL_TREE if no such type exists.
3161 The type is chosen so that converted value has the
3162 same number of elements as VAR's vector type. */
3164 static tree
3165 search_type_for_mask (tree var, vec_info *vinfo)
3167 gimple *def_stmt;
3168 enum vect_def_type dt;
3169 tree rhs1;
3170 enum tree_code rhs_code;
3171 tree res = NULL_TREE, res2;
3173 if (TREE_CODE (var) != SSA_NAME)
3174 return NULL_TREE;
3176 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3177 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3178 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3179 return NULL_TREE;
3181 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3182 return NULL_TREE;
3184 if (dt != vect_internal_def)
3185 return NULL_TREE;
3187 if (!is_gimple_assign (def_stmt))
3188 return NULL_TREE;
3190 rhs_code = gimple_assign_rhs_code (def_stmt);
3191 rhs1 = gimple_assign_rhs1 (def_stmt);
3193 switch (rhs_code)
3195 case SSA_NAME:
3196 case BIT_NOT_EXPR:
3197 CASE_CONVERT:
3198 res = search_type_for_mask (rhs1, vinfo);
3199 break;
3201 case BIT_AND_EXPR:
3202 case BIT_IOR_EXPR:
3203 case BIT_XOR_EXPR:
3204 res = search_type_for_mask (rhs1, vinfo);
3205 res2 = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo);
3206 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3207 res = res2;
3208 break;
3210 default:
3211 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3213 tree comp_vectype, mask_type;
3215 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3216 if (comp_vectype == NULL_TREE)
3217 return NULL_TREE;
3219 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3220 if (!mask_type
3221 || !expand_vec_cmp_expr_p (comp_vectype, mask_type))
3222 return NULL_TREE;
3224 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3225 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3227 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3228 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3230 else
3231 res = TREE_TYPE (rhs1);
3235 return res;
3239 /* Function vect_recog_bool_pattern
3241 Try to find pattern like following:
3243 bool a_b, b_b, c_b, d_b, e_b;
3244 TYPE f_T;
3245 loop:
3246 S1 a_b = x1 CMP1 y1;
3247 S2 b_b = x2 CMP2 y2;
3248 S3 c_b = a_b & b_b;
3249 S4 d_b = x3 CMP3 y3;
3250 S5 e_b = c_b | d_b;
3251 S6 f_T = (TYPE) e_b;
3253 where type 'TYPE' is an integral type. Or a similar pattern
3254 ending in
3256 S6 f_Y = e_b ? r_Y : s_Y;
3258 as results from if-conversion of a complex condition.
3260 Input:
3262 * LAST_STMT: A stmt at the end from which the pattern
3263 search begins, i.e. cast of a bool to
3264 an integer type.
3266 Output:
3268 * TYPE_IN: The type of the input arguments to the pattern.
3270 * TYPE_OUT: The type of the output of this pattern.
3272 * Return value: A new stmt that will be used to replace the pattern.
3274 Assuming size of TYPE is the same as size of all comparisons
3275 (otherwise some casts would be added where needed), the above
3276 sequence we create related pattern stmts:
3277 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3278 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3279 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3280 S5' e_T = c_T | d_T;
3281 S6' f_T = e_T;
3283 Instead of the above S3' we could emit:
3284 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3285 S3' c_T = a_T | b_T;
3286 but the above is more efficient. */
3288 static gimple *
3289 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3290 tree *type_out)
3292 gimple *last_stmt = stmts->pop ();
3293 enum tree_code rhs_code;
3294 tree var, lhs, rhs, vectype;
3295 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3296 stmt_vec_info new_stmt_info;
3297 vec_info *vinfo = stmt_vinfo->vinfo;
3298 gimple *pattern_stmt;
3300 if (!is_gimple_assign (last_stmt))
3301 return NULL;
3303 var = gimple_assign_rhs1 (last_stmt);
3304 lhs = gimple_assign_lhs (last_stmt);
3306 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3307 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3308 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3309 return NULL;
3311 rhs_code = gimple_assign_rhs_code (last_stmt);
3312 if (CONVERT_EXPR_CODE_P (rhs_code))
3314 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3315 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3316 return NULL;
3317 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3318 if (vectype == NULL_TREE)
3319 return NULL;
3321 if (check_bool_pattern (var, vinfo))
3323 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3324 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3325 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3326 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3327 else
3328 pattern_stmt
3329 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3331 else
3333 tree type = search_type_for_mask (var, vinfo);
3334 tree cst0, cst1, tmp;
3336 if (!type)
3337 return NULL;
3339 /* We may directly use cond with narrowed type to avoid
3340 multiple cond exprs with following result packing and
3341 perform single cond with packed mask instead. In case
3342 of widening we better make cond first and then extract
3343 results. */
3344 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3345 type = TREE_TYPE (lhs);
3347 cst0 = build_int_cst (type, 0);
3348 cst1 = build_int_cst (type, 1);
3349 tmp = vect_recog_temp_ssa_var (type, NULL);
3350 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3352 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3354 tree new_vectype = get_vectype_for_scalar_type (type);
3355 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3356 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3357 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3358 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3360 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3361 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3365 *type_out = vectype;
3366 *type_in = vectype;
3367 stmts->safe_push (last_stmt);
3368 if (dump_enabled_p ())
3369 dump_printf_loc (MSG_NOTE, vect_location,
3370 "vect_recog_bool_pattern: detected:\n");
3372 return pattern_stmt;
3374 else if (rhs_code == COND_EXPR
3375 && TREE_CODE (var) == SSA_NAME)
3377 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3378 if (vectype == NULL_TREE)
3379 return NULL;
3381 /* Build a scalar type for the boolean result that when
3382 vectorized matches the vector type of the result in
3383 size and number of elements. */
3384 unsigned prec
3385 = wi::udiv_trunc (TYPE_SIZE (vectype),
3386 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3387 tree type
3388 = build_nonstandard_integer_type (prec,
3389 TYPE_UNSIGNED (TREE_TYPE (var)));
3390 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3391 return NULL;
3393 if (!check_bool_pattern (var, vinfo))
3394 return NULL;
3396 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3398 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3399 pattern_stmt
3400 = gimple_build_assign (lhs, COND_EXPR,
3401 build2 (NE_EXPR, boolean_type_node,
3402 rhs, build_int_cst (type, 0)),
3403 gimple_assign_rhs2 (last_stmt),
3404 gimple_assign_rhs3 (last_stmt));
3405 *type_out = vectype;
3406 *type_in = vectype;
3407 stmts->safe_push (last_stmt);
3408 if (dump_enabled_p ())
3409 dump_printf_loc (MSG_NOTE, vect_location,
3410 "vect_recog_bool_pattern: detected:\n");
3412 return pattern_stmt;
3414 else if (rhs_code == SSA_NAME
3415 && STMT_VINFO_DATA_REF (stmt_vinfo))
3417 stmt_vec_info pattern_stmt_info;
3418 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3419 gcc_assert (vectype != NULL_TREE);
3420 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3421 return NULL;
3423 if (check_bool_pattern (var, vinfo))
3424 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype),
3425 NULL_TREE, stmts);
3426 else
3428 tree type = search_type_for_mask (var, vinfo);
3429 tree cst0, cst1, new_vectype;
3431 if (!type)
3432 return NULL;
3434 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3435 type = TREE_TYPE (vectype);
3437 cst0 = build_int_cst (type, 0);
3438 cst1 = build_int_cst (type, 1);
3439 new_vectype = get_vectype_for_scalar_type (type);
3441 rhs = vect_recog_temp_ssa_var (type, NULL);
3442 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3444 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3445 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3446 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3447 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3450 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3451 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3453 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3454 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3455 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3456 rhs = rhs2;
3458 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3459 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3460 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3461 STMT_VINFO_DATA_REF (pattern_stmt_info)
3462 = STMT_VINFO_DATA_REF (stmt_vinfo);
3463 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3464 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3465 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3466 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3467 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3468 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3469 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3470 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3471 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3472 *type_out = vectype;
3473 *type_in = vectype;
3474 stmts->safe_push (last_stmt);
3475 if (dump_enabled_p ())
3476 dump_printf_loc (MSG_NOTE, vect_location,
3477 "vect_recog_bool_pattern: detected:\n");
3478 return pattern_stmt;
3480 else
3481 return NULL;
3485 /* A helper for vect_recog_mask_conversion_pattern. Build
3486 conversion of MASK to a type suitable for masking VECTYPE.
3487 Built statement gets required vectype and is appended to
3488 a pattern sequence of STMT_VINFO.
3490 Return converted mask. */
3492 static tree
3493 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3494 vec_info *vinfo)
3496 gimple *stmt;
3497 tree masktype, tmp;
3498 stmt_vec_info new_stmt_info;
3500 masktype = build_same_sized_truth_vector_type (vectype);
3501 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3502 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3503 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3504 set_vinfo_for_stmt (stmt, new_stmt_info);
3505 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3506 append_pattern_def_seq (stmt_vinfo, stmt);
3508 return tmp;
3512 /* Function vect_recog_mask_conversion_pattern
3514 Try to find statements which require boolean type
3515 converison. Additional conversion statements are
3516 added to handle such cases. For example:
3518 bool m_1, m_2, m_3;
3519 int i_4, i_5;
3520 double d_6, d_7;
3521 char c_1, c_2, c_3;
3523 S1 m_1 = i_4 > i_5;
3524 S2 m_2 = d_6 < d_7;
3525 S3 m_3 = m_1 & m_2;
3526 S4 c_1 = m_3 ? c_2 : c_3;
3528 Will be transformed into:
3530 S1 m_1 = i_4 > i_5;
3531 S2 m_2 = d_6 < d_7;
3532 S3'' m_2' = (_Bool[bitsize=32])m_2
3533 S3' m_3' = m_1 & m_2';
3534 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3535 S4' c_1' = m_3'' ? c_2 : c_3; */
3537 static gimple *
3538 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3539 tree *type_out)
3541 gimple *last_stmt = stmts->pop ();
3542 enum tree_code rhs_code;
3543 tree lhs, rhs1, rhs2, tmp, rhs1_type, rhs2_type, vectype1, vectype2;
3544 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3545 stmt_vec_info pattern_stmt_info;
3546 vec_info *vinfo = stmt_vinfo->vinfo;
3547 gimple *pattern_stmt;
3549 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3550 if (is_gimple_call (last_stmt)
3551 && gimple_call_internal_p (last_stmt)
3552 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3553 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3555 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3557 if (load)
3559 lhs = gimple_call_lhs (last_stmt);
3560 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3562 else
3564 rhs2 = gimple_call_arg (last_stmt, 3);
3565 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3568 rhs1 = gimple_call_arg (last_stmt, 2);
3569 rhs1_type = search_type_for_mask (rhs1, vinfo);
3570 if (!rhs1_type)
3571 return NULL;
3572 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3574 if (!vectype1 || !vectype2
3575 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3576 return NULL;
3578 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3580 if (load)
3582 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3583 pattern_stmt
3584 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3585 gimple_call_arg (last_stmt, 0),
3586 gimple_call_arg (last_stmt, 1),
3587 tmp);
3588 gimple_call_set_lhs (pattern_stmt, lhs);
3590 else
3591 pattern_stmt
3592 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3593 gimple_call_arg (last_stmt, 0),
3594 gimple_call_arg (last_stmt, 1),
3595 tmp,
3596 gimple_call_arg (last_stmt, 3));
3599 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3600 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3601 STMT_VINFO_DATA_REF (pattern_stmt_info)
3602 = STMT_VINFO_DATA_REF (stmt_vinfo);
3603 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3604 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3605 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3606 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3607 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3608 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3609 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3610 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3611 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3613 *type_out = vectype1;
3614 *type_in = vectype1;
3615 stmts->safe_push (last_stmt);
3616 if (dump_enabled_p ())
3617 dump_printf_loc (MSG_NOTE, vect_location,
3618 "vect_recog_mask_conversion_pattern: detected:\n");
3620 return pattern_stmt;
3623 if (!is_gimple_assign (last_stmt))
3624 return NULL;
3626 lhs = gimple_assign_lhs (last_stmt);
3627 rhs1 = gimple_assign_rhs1 (last_stmt);
3628 rhs_code = gimple_assign_rhs_code (last_stmt);
3630 /* Check for cond expression requiring mask conversion. */
3631 if (rhs_code == COND_EXPR)
3633 /* vect_recog_mixed_size_cond_pattern could apply.
3634 Do nothing then. */
3635 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3636 return NULL;
3638 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3640 if (TREE_CODE (rhs1) == SSA_NAME)
3642 rhs1_type = search_type_for_mask (rhs1, vinfo);
3643 if (!rhs1_type)
3644 return NULL;
3646 else
3647 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3649 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3651 if (!vectype1 || !vectype2
3652 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3653 return NULL;
3655 /* If rhs1 is a comparison we need to move it into a
3656 separate statement. */
3657 if (TREE_CODE (rhs1) != SSA_NAME)
3659 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3660 pattern_stmt = gimple_build_assign (tmp, rhs1);
3661 rhs1 = tmp;
3663 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3664 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3665 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
3666 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3669 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3671 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3672 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
3673 gimple_assign_rhs2 (last_stmt),
3674 gimple_assign_rhs3 (last_stmt));
3676 *type_out = vectype1;
3677 *type_in = vectype1;
3678 stmts->safe_push (last_stmt);
3679 if (dump_enabled_p ())
3680 dump_printf_loc (MSG_NOTE, vect_location,
3681 "vect_recog_mask_conversion_pattern: detected:\n");
3683 return pattern_stmt;
3686 /* Now check for binary boolean operations requiring conversion for
3687 one of operands. */
3688 if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
3689 return NULL;
3691 if (rhs_code != BIT_IOR_EXPR
3692 && rhs_code != BIT_XOR_EXPR
3693 && rhs_code != BIT_AND_EXPR)
3694 return NULL;
3696 rhs2 = gimple_assign_rhs2 (last_stmt);
3698 rhs1_type = search_type_for_mask (rhs1, vinfo);
3699 rhs2_type = search_type_for_mask (rhs2, vinfo);
3701 if (!rhs1_type || !rhs2_type
3702 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
3703 return NULL;
3705 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
3707 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
3708 if (!vectype1)
3709 return NULL;
3710 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
3712 else
3714 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
3715 if (!vectype1)
3716 return NULL;
3717 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3720 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3721 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
3723 *type_out = vectype1;
3724 *type_in = vectype1;
3725 stmts->safe_push (last_stmt);
3726 if (dump_enabled_p ())
3727 dump_printf_loc (MSG_NOTE, vect_location,
3728 "vect_recog_mask_conversion_pattern: detected:\n");
3730 return pattern_stmt;
3734 /* Mark statements that are involved in a pattern. */
3736 static inline void
3737 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
3738 tree pattern_vectype)
3740 stmt_vec_info pattern_stmt_info, def_stmt_info;
3741 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3742 vec_info *vinfo = orig_stmt_info->vinfo;
3743 gimple *def_stmt;
3745 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3746 if (pattern_stmt_info == NULL)
3748 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3749 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3751 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3753 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3754 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3755 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3756 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3757 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3758 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3759 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3760 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3761 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3763 gimple_stmt_iterator si;
3764 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3765 !gsi_end_p (si); gsi_next (&si))
3767 def_stmt = gsi_stmt (si);
3768 def_stmt_info = vinfo_for_stmt (def_stmt);
3769 if (def_stmt_info == NULL)
3771 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3772 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3774 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3775 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3776 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3777 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3778 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3783 /* Function vect_pattern_recog_1
3785 Input:
3786 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3787 computation pattern.
3788 STMT: A stmt from which the pattern search should start.
3790 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3791 expression that computes the same functionality and can be used to
3792 replace the sequence of stmts that are involved in the pattern.
3794 Output:
3795 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3796 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3797 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3798 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3799 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3800 to the available target pattern.
3802 This function also does some bookkeeping, as explained in the documentation
3803 for vect_recog_pattern. */
3805 static bool
3806 vect_pattern_recog_1 (vect_recog_func *recog_func,
3807 gimple_stmt_iterator si,
3808 vec<gimple *> *stmts_to_replace)
3810 gimple *stmt = gsi_stmt (si), *pattern_stmt;
3811 stmt_vec_info stmt_info;
3812 loop_vec_info loop_vinfo;
3813 tree pattern_vectype;
3814 tree type_in, type_out;
3815 enum tree_code code;
3816 int i;
3817 gimple *next;
3819 stmts_to_replace->truncate (0);
3820 stmts_to_replace->quick_push (stmt);
3821 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
3822 if (!pattern_stmt)
3823 return false;
3825 stmt = stmts_to_replace->last ();
3826 stmt_info = vinfo_for_stmt (stmt);
3827 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3829 if (VECTOR_BOOLEAN_TYPE_P (type_in)
3830 || VECTOR_MODE_P (TYPE_MODE (type_in)))
3832 /* No need to check target support (already checked by the pattern
3833 recognition function). */
3834 pattern_vectype = type_out ? type_out : type_in;
3836 else
3838 machine_mode vec_mode;
3839 enum insn_code icode;
3840 optab optab;
3842 /* Check target support */
3843 type_in = get_vectype_for_scalar_type (type_in);
3844 if (!type_in)
3845 return false;
3846 if (type_out)
3847 type_out = get_vectype_for_scalar_type (type_out);
3848 else
3849 type_out = type_in;
3850 if (!type_out)
3851 return false;
3852 pattern_vectype = type_out;
3854 if (is_gimple_assign (pattern_stmt))
3855 code = gimple_assign_rhs_code (pattern_stmt);
3856 else
3858 gcc_assert (is_gimple_call (pattern_stmt));
3859 code = CALL_EXPR;
3862 optab = optab_for_tree_code (code, type_in, optab_default);
3863 vec_mode = TYPE_MODE (type_in);
3864 if (!optab
3865 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3866 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3867 return false;
3870 /* Found a vectorizable pattern. */
3871 if (dump_enabled_p ())
3873 dump_printf_loc (MSG_NOTE, vect_location,
3874 "%s pattern recognized: ", recog_func->name);
3875 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3878 /* Mark the stmts that are involved in the pattern. */
3879 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3881 /* Patterns cannot be vectorized using SLP, because they change the order of
3882 computation. */
3883 if (loop_vinfo)
3884 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3885 if (next == stmt)
3886 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3888 /* It is possible that additional pattern stmts are created and inserted in
3889 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3890 relevant statements. */
3891 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3892 && (unsigned) i < (stmts_to_replace->length () - 1);
3893 i++)
3895 stmt_info = vinfo_for_stmt (stmt);
3896 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3897 if (dump_enabled_p ())
3899 dump_printf_loc (MSG_NOTE, vect_location,
3900 "additional pattern stmt: ");
3901 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3904 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3907 return true;
3911 /* Function vect_pattern_recog
3913 Input:
3914 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3915 computation idioms.
3917 Output - for each computation idiom that is detected we create a new stmt
3918 that provides the same functionality and that can be vectorized. We
3919 also record some information in the struct_stmt_info of the relevant
3920 stmts, as explained below:
3922 At the entry to this function we have the following stmts, with the
3923 following initial value in the STMT_VINFO fields:
3925 stmt in_pattern_p related_stmt vec_stmt
3926 S1: a_i = .... - - -
3927 S2: a_2 = ..use(a_i).. - - -
3928 S3: a_1 = ..use(a_2).. - - -
3929 S4: a_0 = ..use(a_1).. - - -
3930 S5: ... = ..use(a_0).. - - -
3932 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3933 represented by a single stmt. We then:
3934 - create a new stmt S6 equivalent to the pattern (the stmt is not
3935 inserted into the code)
3936 - fill in the STMT_VINFO fields as follows:
3938 in_pattern_p related_stmt vec_stmt
3939 S1: a_i = .... - - -
3940 S2: a_2 = ..use(a_i).. - - -
3941 S3: a_1 = ..use(a_2).. - - -
3942 S4: a_0 = ..use(a_1).. true S6 -
3943 '---> S6: a_new = .... - S4 -
3944 S5: ... = ..use(a_0).. - - -
3946 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3947 to each other through the RELATED_STMT field).
3949 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3950 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3951 remain irrelevant unless used by stmts other than S4.
3953 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3954 (because they are marked as irrelevant). It will vectorize S6, and record
3955 a pointer to the new vector stmt VS6 from S6 (as usual).
3956 S4 will be skipped, and S5 will be vectorized as usual:
3958 in_pattern_p related_stmt vec_stmt
3959 S1: a_i = .... - - -
3960 S2: a_2 = ..use(a_i).. - - -
3961 S3: a_1 = ..use(a_2).. - - -
3962 > VS6: va_new = .... - - -
3963 S4: a_0 = ..use(a_1).. true S6 VS6
3964 '---> S6: a_new = .... - S4 VS6
3965 > VS5: ... = ..vuse(va_new).. - - -
3966 S5: ... = ..use(a_0).. - - -
3968 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3969 elsewhere), and we'll end up with:
3971 VS6: va_new = ....
3972 VS5: ... = ..vuse(va_new)..
3974 In case of more than one pattern statements, e.g., widen-mult with
3975 intermediate type:
3977 S1 a_t = ;
3978 S2 a_T = (TYPE) a_t;
3979 '--> S3: a_it = (interm_type) a_t;
3980 S4 prod_T = a_T * CONST;
3981 '--> S5: prod_T' = a_it w* CONST;
3983 there may be other users of a_T outside the pattern. In that case S2 will
3984 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3985 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3986 be recorded in S3. */
3988 void
3989 vect_pattern_recog (vec_info *vinfo)
3991 struct loop *loop;
3992 basic_block *bbs;
3993 unsigned int nbbs;
3994 gimple_stmt_iterator si;
3995 unsigned int i, j;
3996 auto_vec<gimple *, 1> stmts_to_replace;
3997 gimple *stmt;
3999 if (dump_enabled_p ())
4000 dump_printf_loc (MSG_NOTE, vect_location,
4001 "=== vect_pattern_recog ===\n");
4003 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4005 loop = LOOP_VINFO_LOOP (loop_vinfo);
4006 bbs = LOOP_VINFO_BBS (loop_vinfo);
4007 nbbs = loop->num_nodes;
4009 /* Scan through the loop stmts, applying the pattern recognition
4010 functions starting at each stmt visited: */
4011 for (i = 0; i < nbbs; i++)
4013 basic_block bb = bbs[i];
4014 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4016 /* Scan over all generic vect_recog_xxx_pattern functions. */
4017 for (j = 0; j < NUM_PATTERNS; j++)
4018 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4019 &stmts_to_replace))
4020 break;
4024 else
4026 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4027 for (si = bb_vinfo->region_begin;
4028 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4030 if ((stmt = gsi_stmt (si))
4031 && vinfo_for_stmt (stmt)
4032 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4033 continue;
4035 /* Scan over all generic vect_recog_xxx_pattern functions. */
4036 for (j = 0; j < NUM_PATTERNS; j++)
4037 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4038 &stmts_to_replace))
4039 break;