[PR67828] don't unswitch on default defs of non-parms
[official-gcc.git] / gcc / tree-vect-patterns.c
blob3d324bf6cf065e4d3b46e4c58a389786a7432a86
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "rtl.h"
28 #include "ssa.h"
29 #include "alias.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "target.h"
33 #include "dominance.h"
34 #include "gimple-pretty-print.h"
35 #include "internal-fn.h"
36 #include "tree-eh.h"
37 #include "gimplify.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "flags.h"
41 #include "insn-config.h"
42 #include "expmed.h"
43 #include "insn-codes.h"
44 #include "optabs-tree.h"
45 #include "params.h"
46 #include "tree-data-ref.h"
47 #include "tree-vectorizer.h"
48 #include "recog.h" /* FIXME: for insn_data */
49 #include "diagnostic-core.h"
50 #include "dumpfile.h"
51 #include "builtins.h"
53 /* Pattern recognition functions */
54 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
55 tree *);
56 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
57 tree *);
58 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
59 tree *);
60 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
61 tree *);
62 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
63 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
64 tree *);
65 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
66 tree *, tree *);
67 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
68 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
69 tree *, tree *);
70 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
71 tree *, tree *);
73 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
74 tree *, tree *);
76 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
77 tree *, tree *);
78 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
79 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
80 vect_recog_widen_mult_pattern,
81 vect_recog_widen_sum_pattern,
82 vect_recog_dot_prod_pattern,
83 vect_recog_sad_pattern,
84 vect_recog_pow_pattern,
85 vect_recog_widen_shift_pattern,
86 vect_recog_over_widening_pattern,
87 vect_recog_rotate_pattern,
88 vect_recog_vector_vector_shift_pattern,
89 vect_recog_divmod_pattern,
90 vect_recog_mult_pattern,
91 vect_recog_mixed_size_cond_pattern,
92 vect_recog_bool_pattern};
94 static inline void
95 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
97 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
98 stmt);
101 static inline void
102 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
104 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
105 append_pattern_def_seq (stmt_info, stmt);
108 /* Check whether STMT2 is in the same loop or basic block as STMT1.
109 Which of the two applies depends on whether we're currently doing
110 loop-based or basic-block-based vectorization, as determined by
111 the vinfo_for_stmt for STMT1 (which must be defined).
113 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
114 to be defined as well. */
116 static bool
117 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
119 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
120 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
121 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
123 if (!gimple_bb (stmt2))
124 return false;
126 if (loop_vinfo)
128 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
129 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
130 return false;
132 else
134 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
135 || gimple_code (stmt2) == GIMPLE_PHI)
136 return false;
139 gcc_assert (vinfo_for_stmt (stmt2));
140 return true;
143 /* If the LHS of DEF_STMT has a single use, and that statement is
144 in the same loop or basic block, return it. */
146 static gimple *
147 vect_single_imm_use (gimple *def_stmt)
149 tree lhs = gimple_assign_lhs (def_stmt);
150 use_operand_p use_p;
151 gimple *use_stmt;
153 if (!single_imm_use (lhs, &use_p, &use_stmt))
154 return NULL;
156 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
157 return NULL;
159 return use_stmt;
162 /* Check whether NAME, an ssa-name used in USE_STMT,
163 is a result of a type promotion, such that:
164 DEF_STMT: NAME = NOP (name0)
165 If CHECK_SIGN is TRUE, check that either both types are signed or both are
166 unsigned. */
168 static bool
169 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
170 tree *orig_type, gimple **def_stmt, bool *promotion)
172 tree dummy;
173 gimple *dummy_gimple;
174 stmt_vec_info stmt_vinfo;
175 tree type = TREE_TYPE (name);
176 tree oprnd0;
177 enum vect_def_type dt;
178 tree def;
180 stmt_vinfo = vinfo_for_stmt (use_stmt);
181 if (!vect_is_simple_use (name, use_stmt, stmt_vinfo->vinfo, def_stmt,
182 &def, &dt))
183 return false;
185 if (dt != vect_internal_def
186 && dt != vect_external_def && dt != vect_constant_def)
187 return false;
189 if (!*def_stmt)
190 return false;
192 if (!is_gimple_assign (*def_stmt))
193 return false;
195 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
196 return false;
198 oprnd0 = gimple_assign_rhs1 (*def_stmt);
200 *orig_type = TREE_TYPE (oprnd0);
201 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
202 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
203 return false;
205 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
206 *promotion = true;
207 else
208 *promotion = false;
210 if (!vect_is_simple_use (oprnd0, *def_stmt, stmt_vinfo->vinfo,
211 &dummy_gimple, &dummy, &dt))
212 return false;
214 return true;
217 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
218 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
220 static tree
221 vect_recog_temp_ssa_var (tree type, gimple *stmt)
223 return make_temp_ssa_name (type, stmt, "patt");
226 /* Function vect_recog_dot_prod_pattern
228 Try to find the following pattern:
230 type x_t, y_t;
231 TYPE1 prod;
232 TYPE2 sum = init;
233 loop:
234 sum_0 = phi <init, sum_1>
235 S1 x_t = ...
236 S2 y_t = ...
237 S3 x_T = (TYPE1) x_t;
238 S4 y_T = (TYPE1) y_t;
239 S5 prod = x_T * y_T;
240 [S6 prod = (TYPE2) prod; #optional]
241 S7 sum_1 = prod + sum_0;
243 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
244 same size of 'TYPE1' or bigger. This is a special case of a reduction
245 computation.
247 Input:
249 * STMTS: Contains a stmt from which the pattern search begins. In the
250 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
251 will be detected.
253 Output:
255 * TYPE_IN: The type of the input arguments to the pattern.
257 * TYPE_OUT: The type of the output of this pattern.
259 * Return value: A new stmt that will be used to replace the sequence of
260 stmts that constitute the pattern. In this case it will be:
261 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
263 Note: The dot-prod idiom is a widening reduction pattern that is
264 vectorized without preserving all the intermediate results. It
265 produces only N/2 (widened) results (by summing up pairs of
266 intermediate results) rather than all N results. Therefore, we
267 cannot allow this pattern when we want to get all the results and in
268 the correct order (as is the case when this computation is in an
269 inner-loop nested in an outer-loop that us being vectorized). */
271 static gimple *
272 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
273 tree *type_out)
275 gimple *stmt, *last_stmt = (*stmts)[0];
276 tree oprnd0, oprnd1;
277 tree oprnd00, oprnd01;
278 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
279 tree type, half_type;
280 gimple *pattern_stmt;
281 tree prod_type;
282 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
283 struct loop *loop;
284 tree var;
285 bool promotion;
287 if (!loop_info)
288 return NULL;
290 loop = LOOP_VINFO_LOOP (loop_info);
292 /* We don't allow changing the order of the computation in the inner-loop
293 when doing outer-loop vectorization. */
294 if (loop && nested_in_vect_loop_p (loop, last_stmt))
295 return NULL;
297 if (!is_gimple_assign (last_stmt))
298 return NULL;
300 type = gimple_expr_type (last_stmt);
302 /* Look for the following pattern
303 DX = (TYPE1) X;
304 DY = (TYPE1) Y;
305 DPROD = DX * DY;
306 DDPROD = (TYPE2) DPROD;
307 sum_1 = DDPROD + sum_0;
308 In which
309 - DX is double the size of X
310 - DY is double the size of Y
311 - DX, DY, DPROD all have the same type
312 - sum is the same size of DPROD or bigger
313 - sum has been recognized as a reduction variable.
315 This is equivalent to:
316 DPROD = X w* Y; #widen mult
317 sum_1 = DPROD w+ sum_0; #widen summation
319 DPROD = X w* Y; #widen mult
320 sum_1 = DPROD + sum_0; #summation
323 /* Starting from LAST_STMT, follow the defs of its uses in search
324 of the above pattern. */
326 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
327 return NULL;
329 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
331 /* Has been detected as widening-summation? */
333 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
334 type = gimple_expr_type (stmt);
335 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
336 return NULL;
337 oprnd0 = gimple_assign_rhs1 (stmt);
338 oprnd1 = gimple_assign_rhs2 (stmt);
339 half_type = TREE_TYPE (oprnd0);
341 else
343 gimple *def_stmt;
345 oprnd0 = gimple_assign_rhs1 (last_stmt);
346 oprnd1 = gimple_assign_rhs2 (last_stmt);
347 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
348 || !types_compatible_p (TREE_TYPE (oprnd1), type))
349 return NULL;
350 stmt = last_stmt;
352 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
353 &promotion)
354 && promotion)
356 stmt = def_stmt;
357 oprnd0 = gimple_assign_rhs1 (stmt);
359 else
360 half_type = type;
363 /* So far so good. Since last_stmt was detected as a (summation) reduction,
364 we know that oprnd1 is the reduction variable (defined by a loop-header
365 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
366 Left to check that oprnd0 is defined by a (widen_)mult_expr */
367 if (TREE_CODE (oprnd0) != SSA_NAME)
368 return NULL;
370 prod_type = half_type;
371 stmt = SSA_NAME_DEF_STMT (oprnd0);
373 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
374 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
375 return NULL;
377 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
378 inside the loop (in case we are analyzing an outer-loop). */
379 if (!is_gimple_assign (stmt))
380 return NULL;
381 stmt_vinfo = vinfo_for_stmt (stmt);
382 gcc_assert (stmt_vinfo);
383 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
384 return NULL;
385 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
386 return NULL;
387 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
389 /* Has been detected as a widening multiplication? */
391 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
392 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
393 return NULL;
394 stmt_vinfo = vinfo_for_stmt (stmt);
395 gcc_assert (stmt_vinfo);
396 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
397 oprnd00 = gimple_assign_rhs1 (stmt);
398 oprnd01 = gimple_assign_rhs2 (stmt);
399 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
400 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
402 else
404 tree half_type0, half_type1;
405 gimple *def_stmt;
406 tree oprnd0, oprnd1;
408 oprnd0 = gimple_assign_rhs1 (stmt);
409 oprnd1 = gimple_assign_rhs2 (stmt);
410 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
411 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
412 return NULL;
413 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
414 &promotion)
415 || !promotion)
416 return NULL;
417 oprnd00 = gimple_assign_rhs1 (def_stmt);
418 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
419 &promotion)
420 || !promotion)
421 return NULL;
422 oprnd01 = gimple_assign_rhs1 (def_stmt);
423 if (!types_compatible_p (half_type0, half_type1))
424 return NULL;
425 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
426 return NULL;
429 half_type = TREE_TYPE (oprnd00);
430 *type_in = half_type;
431 *type_out = type;
433 /* Pattern detected. Create a stmt to be used to replace the pattern: */
434 var = vect_recog_temp_ssa_var (type, NULL);
435 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
436 oprnd00, oprnd01, oprnd1);
438 if (dump_enabled_p ())
440 dump_printf_loc (MSG_NOTE, vect_location,
441 "vect_recog_dot_prod_pattern: detected: ");
442 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
443 dump_printf (MSG_NOTE, "\n");
446 return pattern_stmt;
450 /* Function vect_recog_sad_pattern
452 Try to find the following Sum of Absolute Difference (SAD) pattern:
454 type x_t, y_t;
455 signed TYPE1 diff, abs_diff;
456 TYPE2 sum = init;
457 loop:
458 sum_0 = phi <init, sum_1>
459 S1 x_t = ...
460 S2 y_t = ...
461 S3 x_T = (TYPE1) x_t;
462 S4 y_T = (TYPE1) y_t;
463 S5 diff = x_T - y_T;
464 S6 abs_diff = ABS_EXPR <diff>;
465 [S7 abs_diff = (TYPE2) abs_diff; #optional]
466 S8 sum_1 = abs_diff + sum_0;
468 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
469 same size of 'TYPE1' or bigger. This is a special case of a reduction
470 computation.
472 Input:
474 * STMTS: Contains a stmt from which the pattern search begins. In the
475 example, when this function is called with S8, the pattern
476 {S3,S4,S5,S6,S7,S8} will be detected.
478 Output:
480 * TYPE_IN: The type of the input arguments to the pattern.
482 * TYPE_OUT: The type of the output of this pattern.
484 * Return value: A new stmt that will be used to replace the sequence of
485 stmts that constitute the pattern. In this case it will be:
486 SAD_EXPR <x_t, y_t, sum_0>
489 static gimple *
490 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
491 tree *type_out)
493 gimple *last_stmt = (*stmts)[0];
494 tree sad_oprnd0, sad_oprnd1;
495 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
496 tree half_type;
497 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
498 struct loop *loop;
499 bool promotion;
501 if (!loop_info)
502 return NULL;
504 loop = LOOP_VINFO_LOOP (loop_info);
506 /* We don't allow changing the order of the computation in the inner-loop
507 when doing outer-loop vectorization. */
508 if (loop && nested_in_vect_loop_p (loop, last_stmt))
509 return NULL;
511 if (!is_gimple_assign (last_stmt))
512 return NULL;
514 tree sum_type = gimple_expr_type (last_stmt);
516 /* Look for the following pattern
517 DX = (TYPE1) X;
518 DY = (TYPE1) Y;
519 DDIFF = DX - DY;
520 DAD = ABS_EXPR <DDIFF>;
521 DDPROD = (TYPE2) DPROD;
522 sum_1 = DAD + sum_0;
523 In which
524 - DX is at least double the size of X
525 - DY is at least double the size of Y
526 - DX, DY, DDIFF, DAD all have the same type
527 - sum is the same size of DAD or bigger
528 - sum has been recognized as a reduction variable.
530 This is equivalent to:
531 DDIFF = X w- Y; #widen sub
532 DAD = ABS_EXPR <DDIFF>;
533 sum_1 = DAD w+ sum_0; #widen summation
535 DDIFF = X w- Y; #widen sub
536 DAD = ABS_EXPR <DDIFF>;
537 sum_1 = DAD + sum_0; #summation
540 /* Starting from LAST_STMT, follow the defs of its uses in search
541 of the above pattern. */
543 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
544 return NULL;
546 tree plus_oprnd0, plus_oprnd1;
548 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
550 /* Has been detected as widening-summation? */
552 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
553 sum_type = gimple_expr_type (stmt);
554 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
555 return NULL;
556 plus_oprnd0 = gimple_assign_rhs1 (stmt);
557 plus_oprnd1 = gimple_assign_rhs2 (stmt);
558 half_type = TREE_TYPE (plus_oprnd0);
560 else
562 gimple *def_stmt;
564 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
565 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
566 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
567 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
568 return NULL;
570 /* The type conversion could be promotion, demotion,
571 or just signed -> unsigned. */
572 if (type_conversion_p (plus_oprnd0, last_stmt, false,
573 &half_type, &def_stmt, &promotion))
574 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
575 else
576 half_type = sum_type;
579 /* So far so good. Since last_stmt was detected as a (summation) reduction,
580 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
581 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
582 Then check that plus_oprnd0 is defined by an abs_expr. */
584 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
585 return NULL;
587 tree abs_type = half_type;
588 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
590 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
591 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
592 return NULL;
594 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
595 inside the loop (in case we are analyzing an outer-loop). */
596 if (!is_gimple_assign (abs_stmt))
597 return NULL;
599 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
600 gcc_assert (abs_stmt_vinfo);
601 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
602 return NULL;
603 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
604 return NULL;
606 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
607 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
608 return NULL;
609 if (TYPE_UNSIGNED (abs_type))
610 return NULL;
612 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
614 if (TREE_CODE (abs_oprnd) != SSA_NAME)
615 return NULL;
617 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
619 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
620 if (!gimple_bb (diff_stmt)
621 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
622 return NULL;
624 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
625 inside the loop (in case we are analyzing an outer-loop). */
626 if (!is_gimple_assign (diff_stmt))
627 return NULL;
629 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
630 gcc_assert (diff_stmt_vinfo);
631 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
632 return NULL;
633 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
634 return NULL;
636 tree half_type0, half_type1;
637 gimple *def_stmt;
639 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
640 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
642 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
643 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
644 return NULL;
645 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
646 &half_type0, &def_stmt, &promotion)
647 || !promotion)
648 return NULL;
649 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
651 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
652 &half_type1, &def_stmt, &promotion)
653 || !promotion)
654 return NULL;
655 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
657 if (!types_compatible_p (half_type0, half_type1))
658 return NULL;
659 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
660 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
661 return NULL;
663 *type_in = TREE_TYPE (sad_oprnd0);
664 *type_out = sum_type;
666 /* Pattern detected. Create a stmt to be used to replace the pattern: */
667 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
668 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
669 sad_oprnd1, plus_oprnd1);
671 if (dump_enabled_p ())
673 dump_printf_loc (MSG_NOTE, vect_location,
674 "vect_recog_sad_pattern: detected: ");
675 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
676 dump_printf (MSG_NOTE, "\n");
679 return pattern_stmt;
683 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
684 and LSHIFT_EXPR.
686 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
687 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
689 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
690 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
691 that satisfies the above restrictions, we can perform a widening opeartion
692 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
693 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
695 static bool
696 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
697 tree const_oprnd, tree *oprnd,
698 gimple **wstmt, tree type,
699 tree *half_type, gimple *def_stmt)
701 tree new_type, new_oprnd;
703 if (code != MULT_EXPR && code != LSHIFT_EXPR)
704 return false;
706 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
707 || (code == LSHIFT_EXPR
708 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
709 != 1))
710 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
712 /* CONST_OPRND is a constant of HALF_TYPE. */
713 *oprnd = gimple_assign_rhs1 (def_stmt);
714 return true;
717 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
718 return false;
720 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
721 return false;
723 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
724 a type 2 times bigger than HALF_TYPE. */
725 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
726 TYPE_UNSIGNED (type));
727 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
728 || (code == LSHIFT_EXPR
729 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
730 return false;
732 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
733 *oprnd = gimple_assign_rhs1 (def_stmt);
734 new_oprnd = make_ssa_name (new_type);
735 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
736 *oprnd = new_oprnd;
738 *half_type = new_type;
739 return true;
743 /* Function vect_recog_widen_mult_pattern
745 Try to find the following pattern:
747 type1 a_t;
748 type2 b_t;
749 TYPE a_T, b_T, prod_T;
751 S1 a_t = ;
752 S2 b_t = ;
753 S3 a_T = (TYPE) a_t;
754 S4 b_T = (TYPE) b_t;
755 S5 prod_T = a_T * b_T;
757 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
759 Also detect unsigned cases:
761 unsigned type1 a_t;
762 unsigned type2 b_t;
763 unsigned TYPE u_prod_T;
764 TYPE a_T, b_T, prod_T;
766 S1 a_t = ;
767 S2 b_t = ;
768 S3 a_T = (TYPE) a_t;
769 S4 b_T = (TYPE) b_t;
770 S5 prod_T = a_T * b_T;
771 S6 u_prod_T = (unsigned TYPE) prod_T;
773 and multiplication by constants:
775 type a_t;
776 TYPE a_T, prod_T;
778 S1 a_t = ;
779 S3 a_T = (TYPE) a_t;
780 S5 prod_T = a_T * CONST;
782 A special case of multiplication by constants is when 'TYPE' is 4 times
783 bigger than 'type', but CONST fits an intermediate type 2 times smaller
784 than 'TYPE'. In that case we create an additional pattern stmt for S3
785 to create a variable of the intermediate type, and perform widen-mult
786 on the intermediate type as well:
788 type a_t;
789 interm_type a_it;
790 TYPE a_T, prod_T, prod_T';
792 S1 a_t = ;
793 S3 a_T = (TYPE) a_t;
794 '--> a_it = (interm_type) a_t;
795 S5 prod_T = a_T * CONST;
796 '--> prod_T' = a_it w* CONST;
798 Input/Output:
800 * STMTS: Contains a stmt from which the pattern search begins. In the
801 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
802 is detected. In case of unsigned widen-mult, the original stmt (S5) is
803 replaced with S6 in STMTS. In case of multiplication by a constant
804 of an intermediate type (the last case above), STMTS also contains S3
805 (inserted before S5).
807 Output:
809 * TYPE_IN: The type of the input arguments to the pattern.
811 * TYPE_OUT: The type of the output of this pattern.
813 * Return value: A new stmt that will be used to replace the sequence of
814 stmts that constitute the pattern. In this case it will be:
815 WIDEN_MULT <a_t, b_t>
816 If the result of WIDEN_MULT needs to be converted to a larger type, the
817 returned stmt will be this type conversion stmt.
820 static gimple *
821 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
822 tree *type_in, tree *type_out)
824 gimple *last_stmt = stmts->pop ();
825 gimple *def_stmt0, *def_stmt1;
826 tree oprnd0, oprnd1;
827 tree type, half_type0, half_type1;
828 gimple *new_stmt = NULL, *pattern_stmt = NULL;
829 tree vectype, vecitype;
830 tree var;
831 enum tree_code dummy_code;
832 int dummy_int;
833 vec<tree> dummy_vec;
834 bool op1_ok;
835 bool promotion;
837 if (!is_gimple_assign (last_stmt))
838 return NULL;
840 type = gimple_expr_type (last_stmt);
842 /* Starting from LAST_STMT, follow the defs of its uses in search
843 of the above pattern. */
845 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
846 return NULL;
848 oprnd0 = gimple_assign_rhs1 (last_stmt);
849 oprnd1 = gimple_assign_rhs2 (last_stmt);
850 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
851 || !types_compatible_p (TREE_TYPE (oprnd1), type))
852 return NULL;
854 /* Check argument 0. */
855 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
856 &promotion)
857 || !promotion)
858 return NULL;
859 /* Check argument 1. */
860 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
861 &def_stmt1, &promotion);
863 if (op1_ok && promotion)
865 oprnd0 = gimple_assign_rhs1 (def_stmt0);
866 oprnd1 = gimple_assign_rhs1 (def_stmt1);
868 else
870 if (TREE_CODE (oprnd1) == INTEGER_CST
871 && TREE_CODE (half_type0) == INTEGER_TYPE
872 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
873 &oprnd0, &new_stmt, type,
874 &half_type0, def_stmt0))
876 half_type1 = half_type0;
877 oprnd1 = fold_convert (half_type1, oprnd1);
879 else
880 return NULL;
883 /* If the two arguments have different sizes, convert the one with
884 the smaller type into the larger type. */
885 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
887 /* If we already used up the single-stmt slot give up. */
888 if (new_stmt)
889 return NULL;
891 tree* oprnd = NULL;
892 gimple *def_stmt = NULL;
894 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
896 def_stmt = def_stmt0;
897 half_type0 = half_type1;
898 oprnd = &oprnd0;
900 else
902 def_stmt = def_stmt1;
903 half_type1 = half_type0;
904 oprnd = &oprnd1;
907 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
908 tree new_oprnd = make_ssa_name (half_type0);
909 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
910 *oprnd = new_oprnd;
913 /* Handle unsigned case. Look for
914 S6 u_prod_T = (unsigned TYPE) prod_T;
915 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
916 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
918 gimple *use_stmt;
919 tree use_lhs;
920 tree use_type;
922 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
923 return NULL;
925 use_stmt = vect_single_imm_use (last_stmt);
926 if (!use_stmt || !is_gimple_assign (use_stmt)
927 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
928 return NULL;
930 use_lhs = gimple_assign_lhs (use_stmt);
931 use_type = TREE_TYPE (use_lhs);
932 if (!INTEGRAL_TYPE_P (use_type)
933 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
934 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
935 return NULL;
937 type = use_type;
938 last_stmt = use_stmt;
941 if (!types_compatible_p (half_type0, half_type1))
942 return NULL;
944 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
945 to get an intermediate result of type ITYPE. In this case we need
946 to build a statement to convert this intermediate result to type TYPE. */
947 tree itype = type;
948 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
949 itype = build_nonstandard_integer_type
950 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
951 TYPE_UNSIGNED (type));
953 /* Pattern detected. */
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_NOTE, vect_location,
956 "vect_recog_widen_mult_pattern: detected:\n");
958 /* Check target support */
959 vectype = get_vectype_for_scalar_type (half_type0);
960 vecitype = get_vectype_for_scalar_type (itype);
961 if (!vectype
962 || !vecitype
963 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
964 vecitype, vectype,
965 &dummy_code, &dummy_code,
966 &dummy_int, &dummy_vec))
967 return NULL;
969 *type_in = vectype;
970 *type_out = get_vectype_for_scalar_type (type);
972 /* Pattern supported. Create a stmt to be used to replace the pattern: */
973 var = vect_recog_temp_ssa_var (itype, NULL);
974 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
976 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
977 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
979 /* If the original two operands have different sizes, we may need to convert
980 the smaller one into the larget type. If this is the case, at this point
981 the new stmt is already built. */
982 if (new_stmt)
984 append_pattern_def_seq (stmt_vinfo, new_stmt);
985 stmt_vec_info new_stmt_info
986 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
987 set_vinfo_for_stmt (new_stmt, new_stmt_info);
988 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
991 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
992 the result of the widen-mult operation into type TYPE. */
993 if (itype != type)
995 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
996 stmt_vec_info pattern_stmt_info
997 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
998 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
999 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1000 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1001 NOP_EXPR,
1002 gimple_assign_lhs (pattern_stmt));
1005 if (dump_enabled_p ())
1006 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1008 stmts->safe_push (last_stmt);
1009 return pattern_stmt;
1013 /* Function vect_recog_pow_pattern
1015 Try to find the following pattern:
1017 x = POW (y, N);
1019 with POW being one of pow, powf, powi, powif and N being
1020 either 2 or 0.5.
1022 Input:
1024 * LAST_STMT: A stmt from which the pattern search begins.
1026 Output:
1028 * TYPE_IN: The type of the input arguments to the pattern.
1030 * TYPE_OUT: The type of the output of this pattern.
1032 * Return value: A new stmt that will be used to replace the sequence of
1033 stmts that constitute the pattern. In this case it will be:
1034 x = x * x
1036 x = sqrt (x)
1039 static gimple *
1040 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1041 tree *type_out)
1043 gimple *last_stmt = (*stmts)[0];
1044 tree fn, base, exp = NULL;
1045 gimple *stmt;
1046 tree var;
1048 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1049 return NULL;
1051 fn = gimple_call_fndecl (last_stmt);
1052 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1053 return NULL;
1055 switch (DECL_FUNCTION_CODE (fn))
1057 case BUILT_IN_POWIF:
1058 case BUILT_IN_POWI:
1059 case BUILT_IN_POWF:
1060 case BUILT_IN_POW:
1061 base = gimple_call_arg (last_stmt, 0);
1062 exp = gimple_call_arg (last_stmt, 1);
1063 if (TREE_CODE (exp) != REAL_CST
1064 && TREE_CODE (exp) != INTEGER_CST)
1065 return NULL;
1066 break;
1068 default:
1069 return NULL;
1072 /* We now have a pow or powi builtin function call with a constant
1073 exponent. */
1075 *type_out = NULL_TREE;
1077 /* Catch squaring. */
1078 if ((tree_fits_shwi_p (exp)
1079 && tree_to_shwi (exp) == 2)
1080 || (TREE_CODE (exp) == REAL_CST
1081 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1083 *type_in = TREE_TYPE (base);
1085 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1086 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1087 return stmt;
1090 /* Catch square root. */
1091 if (TREE_CODE (exp) == REAL_CST
1092 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1094 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1095 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1096 if (*type_in)
1098 gcall *stmt = gimple_build_call (newfn, 1, base);
1099 if (vectorizable_function (stmt, *type_in, *type_in)
1100 != NULL_TREE)
1102 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1103 gimple_call_set_lhs (stmt, var);
1104 return stmt;
1109 return NULL;
1113 /* Function vect_recog_widen_sum_pattern
1115 Try to find the following pattern:
1117 type x_t;
1118 TYPE x_T, sum = init;
1119 loop:
1120 sum_0 = phi <init, sum_1>
1121 S1 x_t = *p;
1122 S2 x_T = (TYPE) x_t;
1123 S3 sum_1 = x_T + sum_0;
1125 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1126 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1127 a special case of a reduction computation.
1129 Input:
1131 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1132 when this function is called with S3, the pattern {S2,S3} will be detected.
1134 Output:
1136 * TYPE_IN: The type of the input arguments to the pattern.
1138 * TYPE_OUT: The type of the output of this pattern.
1140 * Return value: A new stmt that will be used to replace the sequence of
1141 stmts that constitute the pattern. In this case it will be:
1142 WIDEN_SUM <x_t, sum_0>
1144 Note: The widening-sum idiom is a widening reduction pattern that is
1145 vectorized without preserving all the intermediate results. It
1146 produces only N/2 (widened) results (by summing up pairs of
1147 intermediate results) rather than all N results. Therefore, we
1148 cannot allow this pattern when we want to get all the results and in
1149 the correct order (as is the case when this computation is in an
1150 inner-loop nested in an outer-loop that us being vectorized). */
1152 static gimple *
1153 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1154 tree *type_out)
1156 gimple *stmt, *last_stmt = (*stmts)[0];
1157 tree oprnd0, oprnd1;
1158 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1159 tree type, half_type;
1160 gimple *pattern_stmt;
1161 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1162 struct loop *loop;
1163 tree var;
1164 bool promotion;
1166 if (!loop_info)
1167 return NULL;
1169 loop = LOOP_VINFO_LOOP (loop_info);
1171 /* We don't allow changing the order of the computation in the inner-loop
1172 when doing outer-loop vectorization. */
1173 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1174 return NULL;
1176 if (!is_gimple_assign (last_stmt))
1177 return NULL;
1179 type = gimple_expr_type (last_stmt);
1181 /* Look for the following pattern
1182 DX = (TYPE) X;
1183 sum_1 = DX + sum_0;
1184 In which DX is at least double the size of X, and sum_1 has been
1185 recognized as a reduction variable.
1188 /* Starting from LAST_STMT, follow the defs of its uses in search
1189 of the above pattern. */
1191 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1192 return NULL;
1194 oprnd0 = gimple_assign_rhs1 (last_stmt);
1195 oprnd1 = gimple_assign_rhs2 (last_stmt);
1196 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1197 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1198 return NULL;
1200 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1201 we know that oprnd1 is the reduction variable (defined by a loop-header
1202 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1203 Left to check that oprnd0 is defined by a cast from type 'type' to type
1204 'TYPE'. */
1206 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1207 &promotion)
1208 || !promotion)
1209 return NULL;
1211 oprnd0 = gimple_assign_rhs1 (stmt);
1212 *type_in = half_type;
1213 *type_out = type;
1215 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1216 var = vect_recog_temp_ssa_var (type, NULL);
1217 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1219 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE, vect_location,
1222 "vect_recog_widen_sum_pattern: detected: ");
1223 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1224 dump_printf (MSG_NOTE, "\n");
1227 return pattern_stmt;
1231 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1233 Input:
1234 STMT - a statement to check.
1235 DEF - we support operations with two operands, one of which is constant.
1236 The other operand can be defined by a demotion operation, or by a
1237 previous statement in a sequence of over-promoted operations. In the
1238 later case DEF is used to replace that operand. (It is defined by a
1239 pattern statement we created for the previous statement in the
1240 sequence).
1242 Input/output:
1243 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1244 NULL, it's the type of DEF.
1245 STMTS - additional pattern statements. If a pattern statement (type
1246 conversion) is created in this function, its original statement is
1247 added to STMTS.
1249 Output:
1250 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1251 operands to use in the new pattern statement for STMT (will be created
1252 in vect_recog_over_widening_pattern ()).
1253 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1254 statements for STMT: the first one is a type promotion and the second
1255 one is the operation itself. We return the type promotion statement
1256 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1257 the second pattern statement. */
1259 static bool
1260 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1261 tree *op0, tree *op1, gimple **new_def_stmt,
1262 vec<gimple *> *stmts)
1264 enum tree_code code;
1265 tree const_oprnd, oprnd;
1266 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1267 gimple *def_stmt, *new_stmt;
1268 bool first = false;
1269 bool promotion;
1271 *op0 = NULL_TREE;
1272 *op1 = NULL_TREE;
1273 *new_def_stmt = NULL;
1275 if (!is_gimple_assign (stmt))
1276 return false;
1278 code = gimple_assign_rhs_code (stmt);
1279 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1280 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1281 return false;
1283 oprnd = gimple_assign_rhs1 (stmt);
1284 const_oprnd = gimple_assign_rhs2 (stmt);
1285 type = gimple_expr_type (stmt);
1287 if (TREE_CODE (oprnd) != SSA_NAME
1288 || TREE_CODE (const_oprnd) != INTEGER_CST)
1289 return false;
1291 /* If oprnd has other uses besides that in stmt we cannot mark it
1292 as being part of a pattern only. */
1293 if (!has_single_use (oprnd))
1294 return false;
1296 /* If we are in the middle of a sequence, we use DEF from a previous
1297 statement. Otherwise, OPRND has to be a result of type promotion. */
1298 if (*new_type)
1300 half_type = *new_type;
1301 oprnd = def;
1303 else
1305 first = true;
1306 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1307 &promotion)
1308 || !promotion
1309 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1310 return false;
1313 /* Can we perform the operation on a smaller type? */
1314 switch (code)
1316 case BIT_IOR_EXPR:
1317 case BIT_XOR_EXPR:
1318 case BIT_AND_EXPR:
1319 if (!int_fits_type_p (const_oprnd, half_type))
1321 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1322 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1323 return false;
1325 interm_type = build_nonstandard_integer_type (
1326 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1327 if (!int_fits_type_p (const_oprnd, interm_type))
1328 return false;
1331 break;
1333 case LSHIFT_EXPR:
1334 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1335 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1336 return false;
1338 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1339 (e.g., if the original value was char, the shift amount is at most 8
1340 if we want to use short). */
1341 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1342 return false;
1344 interm_type = build_nonstandard_integer_type (
1345 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1347 if (!vect_supportable_shift (code, interm_type))
1348 return false;
1350 break;
1352 case RSHIFT_EXPR:
1353 if (vect_supportable_shift (code, half_type))
1354 break;
1356 /* Try intermediate type - HALF_TYPE is not supported. */
1357 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1358 return false;
1360 interm_type = build_nonstandard_integer_type (
1361 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1363 if (!vect_supportable_shift (code, interm_type))
1364 return false;
1366 break;
1368 default:
1369 gcc_unreachable ();
1372 /* There are four possible cases:
1373 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1374 the first statement in the sequence)
1375 a. The original, HALF_TYPE, is not enough - we replace the promotion
1376 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1377 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1378 promotion.
1379 2. OPRND is defined by a pattern statement we created.
1380 a. Its type is not sufficient for the operation, we create a new stmt:
1381 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1382 this statement in NEW_DEF_STMT, and it is later put in
1383 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1384 b. OPRND is good to use in the new statement. */
1385 if (first)
1387 if (interm_type)
1389 /* Replace the original type conversion HALF_TYPE->TYPE with
1390 HALF_TYPE->INTERM_TYPE. */
1391 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1393 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1394 /* Check if the already created pattern stmt is what we need. */
1395 if (!is_gimple_assign (new_stmt)
1396 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1397 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1398 return false;
1400 stmts->safe_push (def_stmt);
1401 oprnd = gimple_assign_lhs (new_stmt);
1403 else
1405 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1406 oprnd = gimple_assign_rhs1 (def_stmt);
1407 new_oprnd = make_ssa_name (interm_type);
1408 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1409 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1410 stmts->safe_push (def_stmt);
1411 oprnd = new_oprnd;
1414 else
1416 /* Retrieve the operand before the type promotion. */
1417 oprnd = gimple_assign_rhs1 (def_stmt);
1420 else
1422 if (interm_type)
1424 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1425 new_oprnd = make_ssa_name (interm_type);
1426 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1427 oprnd = new_oprnd;
1428 *new_def_stmt = new_stmt;
1431 /* Otherwise, OPRND is already set. */
1434 if (interm_type)
1435 *new_type = interm_type;
1436 else
1437 *new_type = half_type;
1439 *op0 = oprnd;
1440 *op1 = fold_convert (*new_type, const_oprnd);
1442 return true;
1446 /* Try to find a statement or a sequence of statements that can be performed
1447 on a smaller type:
1449 type x_t;
1450 TYPE x_T, res0_T, res1_T;
1451 loop:
1452 S1 x_t = *p;
1453 S2 x_T = (TYPE) x_t;
1454 S3 res0_T = op (x_T, C0);
1455 S4 res1_T = op (res0_T, C1);
1456 S5 ... = () res1_T; - type demotion
1458 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1459 constants.
1460 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1461 be 'type' or some intermediate type. For now, we expect S5 to be a type
1462 demotion operation. We also check that S3 and S4 have only one use. */
1464 static gimple *
1465 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1466 tree *type_in, tree *type_out)
1468 gimple *stmt = stmts->pop ();
1469 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1470 *use_stmt = NULL;
1471 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1472 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1473 bool first;
1474 tree type = NULL;
1476 first = true;
1477 while (1)
1479 if (!vinfo_for_stmt (stmt)
1480 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1481 return NULL;
1483 new_def_stmt = NULL;
1484 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1485 &op0, &op1, &new_def_stmt,
1486 stmts))
1488 if (first)
1489 return NULL;
1490 else
1491 break;
1494 /* STMT can be performed on a smaller type. Check its uses. */
1495 use_stmt = vect_single_imm_use (stmt);
1496 if (!use_stmt || !is_gimple_assign (use_stmt))
1497 return NULL;
1499 /* Create pattern statement for STMT. */
1500 vectype = get_vectype_for_scalar_type (new_type);
1501 if (!vectype)
1502 return NULL;
1504 /* We want to collect all the statements for which we create pattern
1505 statetments, except for the case when the last statement in the
1506 sequence doesn't have a corresponding pattern statement. In such
1507 case we associate the last pattern statement with the last statement
1508 in the sequence. Therefore, we only add the original statement to
1509 the list if we know that it is not the last. */
1510 if (prev_stmt)
1511 stmts->safe_push (prev_stmt);
1513 var = vect_recog_temp_ssa_var (new_type, NULL);
1514 pattern_stmt
1515 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1516 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1517 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1519 if (dump_enabled_p ())
1521 dump_printf_loc (MSG_NOTE, vect_location,
1522 "created pattern stmt: ");
1523 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1524 dump_printf (MSG_NOTE, "\n");
1527 type = gimple_expr_type (stmt);
1528 prev_stmt = stmt;
1529 stmt = use_stmt;
1531 first = false;
1534 /* We got a sequence. We expect it to end with a type demotion operation.
1535 Otherwise, we quit (for now). There are three possible cases: the
1536 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1537 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1538 NEW_TYPE differs (we create a new conversion statement). */
1539 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1541 use_lhs = gimple_assign_lhs (use_stmt);
1542 use_type = TREE_TYPE (use_lhs);
1543 /* Support only type demotion or signedess change. */
1544 if (!INTEGRAL_TYPE_P (use_type)
1545 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1546 return NULL;
1548 /* Check that NEW_TYPE is not bigger than the conversion result. */
1549 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1550 return NULL;
1552 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1553 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1555 /* Create NEW_TYPE->USE_TYPE conversion. */
1556 new_oprnd = make_ssa_name (use_type);
1557 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1558 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1560 *type_in = get_vectype_for_scalar_type (new_type);
1561 *type_out = get_vectype_for_scalar_type (use_type);
1563 /* We created a pattern statement for the last statement in the
1564 sequence, so we don't need to associate it with the pattern
1565 statement created for PREV_STMT. Therefore, we add PREV_STMT
1566 to the list in order to mark it later in vect_pattern_recog_1. */
1567 if (prev_stmt)
1568 stmts->safe_push (prev_stmt);
1570 else
1572 if (prev_stmt)
1573 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1574 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1576 *type_in = vectype;
1577 *type_out = NULL_TREE;
1580 stmts->safe_push (use_stmt);
1582 else
1583 /* TODO: support general case, create a conversion to the correct type. */
1584 return NULL;
1586 /* Pattern detected. */
1587 if (dump_enabled_p ())
1589 dump_printf_loc (MSG_NOTE, vect_location,
1590 "vect_recog_over_widening_pattern: detected: ");
1591 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1592 dump_printf (MSG_NOTE, "\n");
1595 return pattern_stmt;
1598 /* Detect widening shift pattern:
1600 type a_t;
1601 TYPE a_T, res_T;
1603 S1 a_t = ;
1604 S2 a_T = (TYPE) a_t;
1605 S3 res_T = a_T << CONST;
1607 where type 'TYPE' is at least double the size of type 'type'.
1609 Also detect cases where the shift result is immediately converted
1610 to another type 'result_type' that is no larger in size than 'TYPE'.
1611 In those cases we perform a widen-shift that directly results in
1612 'result_type', to avoid a possible over-widening situation:
1614 type a_t;
1615 TYPE a_T, res_T;
1616 result_type res_result;
1618 S1 a_t = ;
1619 S2 a_T = (TYPE) a_t;
1620 S3 res_T = a_T << CONST;
1621 S4 res_result = (result_type) res_T;
1622 '--> res_result' = a_t w<< CONST;
1624 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1625 create an additional pattern stmt for S2 to create a variable of an
1626 intermediate type, and perform widen-shift on the intermediate type:
1628 type a_t;
1629 interm_type a_it;
1630 TYPE a_T, res_T, res_T';
1632 S1 a_t = ;
1633 S2 a_T = (TYPE) a_t;
1634 '--> a_it = (interm_type) a_t;
1635 S3 res_T = a_T << CONST;
1636 '--> res_T' = a_it <<* CONST;
1638 Input/Output:
1640 * STMTS: Contains a stmt from which the pattern search begins.
1641 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1642 in STMTS. When an intermediate type is used and a pattern statement is
1643 created for S2, we also put S2 here (before S3).
1645 Output:
1647 * TYPE_IN: The type of the input arguments to the pattern.
1649 * TYPE_OUT: The type of the output of this pattern.
1651 * Return value: A new stmt that will be used to replace the sequence of
1652 stmts that constitute the pattern. In this case it will be:
1653 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1655 static gimple *
1656 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1657 tree *type_in, tree *type_out)
1659 gimple *last_stmt = stmts->pop ();
1660 gimple *def_stmt0;
1661 tree oprnd0, oprnd1;
1662 tree type, half_type0;
1663 gimple *pattern_stmt;
1664 tree vectype, vectype_out = NULL_TREE;
1665 tree var;
1666 enum tree_code dummy_code;
1667 int dummy_int;
1668 vec<tree> dummy_vec;
1669 gimple *use_stmt;
1670 bool promotion;
1672 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1673 return NULL;
1675 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1676 return NULL;
1678 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1679 return NULL;
1681 oprnd0 = gimple_assign_rhs1 (last_stmt);
1682 oprnd1 = gimple_assign_rhs2 (last_stmt);
1683 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1684 return NULL;
1686 /* Check operand 0: it has to be defined by a type promotion. */
1687 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1688 &promotion)
1689 || !promotion)
1690 return NULL;
1692 /* Check operand 1: has to be positive. We check that it fits the type
1693 in vect_handle_widen_op_by_const (). */
1694 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1695 return NULL;
1697 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1698 type = gimple_expr_type (last_stmt);
1700 /* Check for subsequent conversion to another type. */
1701 use_stmt = vect_single_imm_use (last_stmt);
1702 if (use_stmt && is_gimple_assign (use_stmt)
1703 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1704 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1706 tree use_lhs = gimple_assign_lhs (use_stmt);
1707 tree use_type = TREE_TYPE (use_lhs);
1709 if (INTEGRAL_TYPE_P (use_type)
1710 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1712 last_stmt = use_stmt;
1713 type = use_type;
1717 /* Check if this a widening operation. */
1718 gimple *wstmt = NULL;
1719 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1720 &oprnd0, &wstmt,
1721 type, &half_type0, def_stmt0))
1722 return NULL;
1724 /* Pattern detected. */
1725 if (dump_enabled_p ())
1726 dump_printf_loc (MSG_NOTE, vect_location,
1727 "vect_recog_widen_shift_pattern: detected:\n");
1729 /* Check target support. */
1730 vectype = get_vectype_for_scalar_type (half_type0);
1731 vectype_out = get_vectype_for_scalar_type (type);
1733 if (!vectype
1734 || !vectype_out
1735 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1736 vectype_out, vectype,
1737 &dummy_code, &dummy_code,
1738 &dummy_int, &dummy_vec))
1739 return NULL;
1741 *type_in = vectype;
1742 *type_out = vectype_out;
1744 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1745 var = vect_recog_temp_ssa_var (type, NULL);
1746 pattern_stmt =
1747 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1748 if (wstmt)
1750 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1751 new_pattern_def_seq (stmt_vinfo, wstmt);
1752 stmt_vec_info new_stmt_info
1753 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1754 set_vinfo_for_stmt (wstmt, new_stmt_info);
1755 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1758 if (dump_enabled_p ())
1759 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1761 stmts->safe_push (last_stmt);
1762 return pattern_stmt;
1765 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1767 type a_t, b_t, c_t;
1769 S0 a_t = b_t r<< c_t;
1771 Input/Output:
1773 * STMTS: Contains a stmt from which the pattern search begins,
1774 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1775 with a sequence:
1777 S1 d_t = -c_t;
1778 S2 e_t = d_t & (B - 1);
1779 S3 f_t = b_t << c_t;
1780 S4 g_t = b_t >> e_t;
1781 S0 a_t = f_t | g_t;
1783 where B is element bitsize of type.
1785 Output:
1787 * TYPE_IN: The type of the input arguments to the pattern.
1789 * TYPE_OUT: The type of the output of this pattern.
1791 * Return value: A new stmt that will be used to replace the rotate
1792 S0 stmt. */
1794 static gimple *
1795 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1797 gimple *last_stmt = stmts->pop ();
1798 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1799 gimple *pattern_stmt, *def_stmt;
1800 enum tree_code rhs_code;
1801 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1802 vec_info *vinfo = stmt_vinfo->vinfo;
1803 enum vect_def_type dt;
1804 optab optab1, optab2;
1805 edge ext_def = NULL;
1807 if (!is_gimple_assign (last_stmt))
1808 return NULL;
1810 rhs_code = gimple_assign_rhs_code (last_stmt);
1811 switch (rhs_code)
1813 case LROTATE_EXPR:
1814 case RROTATE_EXPR:
1815 break;
1816 default:
1817 return NULL;
1820 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1821 return NULL;
1823 lhs = gimple_assign_lhs (last_stmt);
1824 oprnd0 = gimple_assign_rhs1 (last_stmt);
1825 type = TREE_TYPE (oprnd0);
1826 oprnd1 = gimple_assign_rhs2 (last_stmt);
1827 if (TREE_CODE (oprnd0) != SSA_NAME
1828 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1829 || !INTEGRAL_TYPE_P (type)
1830 || !TYPE_UNSIGNED (type))
1831 return NULL;
1833 if (!vect_is_simple_use (oprnd1, last_stmt, vinfo, &def_stmt, &def, &dt))
1834 return NULL;
1836 if (dt != vect_internal_def
1837 && dt != vect_constant_def
1838 && dt != vect_external_def)
1839 return NULL;
1841 vectype = get_vectype_for_scalar_type (type);
1842 if (vectype == NULL_TREE)
1843 return NULL;
1845 /* If vector/vector or vector/scalar rotate is supported by the target,
1846 don't do anything here. */
1847 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1848 if (optab1
1849 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1850 return NULL;
1852 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1854 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1855 if (optab2
1856 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1857 return NULL;
1860 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1861 don't do anything here either. */
1862 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1863 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1864 if (!optab1
1865 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1866 || !optab2
1867 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1869 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1870 return NULL;
1871 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1872 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1873 if (!optab1
1874 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1875 || !optab2
1876 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1877 return NULL;
1880 *type_in = vectype;
1881 *type_out = vectype;
1882 if (*type_in == NULL_TREE)
1883 return NULL;
1885 if (dt == vect_external_def
1886 && TREE_CODE (oprnd1) == SSA_NAME
1887 && is_a <loop_vec_info> (vinfo))
1889 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1890 ext_def = loop_preheader_edge (loop);
1891 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1893 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1894 if (bb == NULL
1895 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1896 ext_def = NULL;
1900 def = NULL_TREE;
1901 if (TREE_CODE (oprnd1) == INTEGER_CST
1902 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1903 def = oprnd1;
1904 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1906 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1907 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1908 && TYPE_PRECISION (TREE_TYPE (rhs1))
1909 == TYPE_PRECISION (type))
1910 def = rhs1;
1913 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1914 if (def == NULL_TREE)
1916 def = vect_recog_temp_ssa_var (type, NULL);
1917 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1918 if (ext_def)
1920 basic_block new_bb
1921 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1922 gcc_assert (!new_bb);
1924 else
1925 append_pattern_def_seq (stmt_vinfo, def_stmt);
1927 stype = TREE_TYPE (def);
1929 if (TREE_CODE (def) == INTEGER_CST)
1931 if (!tree_fits_uhwi_p (def)
1932 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1933 || integer_zerop (def))
1934 return NULL;
1935 def2 = build_int_cst (stype,
1936 GET_MODE_PRECISION (TYPE_MODE (type))
1937 - tree_to_uhwi (def));
1939 else
1941 tree vecstype = get_vectype_for_scalar_type (stype);
1942 stmt_vec_info def_stmt_vinfo;
1944 if (vecstype == NULL_TREE)
1945 return NULL;
1946 def2 = vect_recog_temp_ssa_var (stype, NULL);
1947 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1948 if (ext_def)
1950 basic_block new_bb
1951 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1952 gcc_assert (!new_bb);
1954 else
1956 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1957 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1958 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1959 append_pattern_def_seq (stmt_vinfo, def_stmt);
1962 def2 = vect_recog_temp_ssa_var (stype, NULL);
1963 tree mask
1964 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1965 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1966 gimple_assign_lhs (def_stmt), mask);
1967 if (ext_def)
1969 basic_block new_bb
1970 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1971 gcc_assert (!new_bb);
1973 else
1975 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1976 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1977 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1978 append_pattern_def_seq (stmt_vinfo, def_stmt);
1982 var1 = vect_recog_temp_ssa_var (type, NULL);
1983 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1984 ? LSHIFT_EXPR : RSHIFT_EXPR,
1985 oprnd0, def);
1986 append_pattern_def_seq (stmt_vinfo, def_stmt);
1988 var2 = vect_recog_temp_ssa_var (type, NULL);
1989 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1990 ? RSHIFT_EXPR : LSHIFT_EXPR,
1991 oprnd0, def2);
1992 append_pattern_def_seq (stmt_vinfo, def_stmt);
1994 /* Pattern detected. */
1995 if (dump_enabled_p ())
1996 dump_printf_loc (MSG_NOTE, vect_location,
1997 "vect_recog_rotate_pattern: detected:\n");
1999 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2000 var = vect_recog_temp_ssa_var (type, NULL);
2001 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2003 if (dump_enabled_p ())
2004 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2006 stmts->safe_push (last_stmt);
2007 return pattern_stmt;
2010 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2011 vectorized:
2013 type a_t;
2014 TYPE b_T, res_T;
2016 S1 a_t = ;
2017 S2 b_T = ;
2018 S3 res_T = b_T op a_t;
2020 where type 'TYPE' is a type with different size than 'type',
2021 and op is <<, >> or rotate.
2023 Also detect cases:
2025 type a_t;
2026 TYPE b_T, c_T, res_T;
2028 S0 c_T = ;
2029 S1 a_t = (type) c_T;
2030 S2 b_T = ;
2031 S3 res_T = b_T op a_t;
2033 Input/Output:
2035 * STMTS: Contains a stmt from which the pattern search begins,
2036 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2037 with a shift/rotate which has same type on both operands, in the
2038 second case just b_T op c_T, in the first case with added cast
2039 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2041 Output:
2043 * TYPE_IN: The type of the input arguments to the pattern.
2045 * TYPE_OUT: The type of the output of this pattern.
2047 * Return value: A new stmt that will be used to replace the shift/rotate
2048 S3 stmt. */
2050 static gimple *
2051 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2052 tree *type_in, tree *type_out)
2054 gimple *last_stmt = stmts->pop ();
2055 tree oprnd0, oprnd1, lhs, var;
2056 gimple *pattern_stmt, *def_stmt;
2057 enum tree_code rhs_code;
2058 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2059 vec_info *vinfo = stmt_vinfo->vinfo;
2060 enum vect_def_type dt;
2061 tree def;
2063 if (!is_gimple_assign (last_stmt))
2064 return NULL;
2066 rhs_code = gimple_assign_rhs_code (last_stmt);
2067 switch (rhs_code)
2069 case LSHIFT_EXPR:
2070 case RSHIFT_EXPR:
2071 case LROTATE_EXPR:
2072 case RROTATE_EXPR:
2073 break;
2074 default:
2075 return NULL;
2078 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2079 return NULL;
2081 lhs = gimple_assign_lhs (last_stmt);
2082 oprnd0 = gimple_assign_rhs1 (last_stmt);
2083 oprnd1 = gimple_assign_rhs2 (last_stmt);
2084 if (TREE_CODE (oprnd0) != SSA_NAME
2085 || TREE_CODE (oprnd1) != SSA_NAME
2086 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2087 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2088 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2089 || TYPE_PRECISION (TREE_TYPE (lhs))
2090 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2091 return NULL;
2093 if (!vect_is_simple_use (oprnd1, last_stmt, vinfo, &def_stmt,
2094 &def, &dt))
2095 return NULL;
2097 if (dt != vect_internal_def)
2098 return NULL;
2100 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2101 *type_out = *type_in;
2102 if (*type_in == NULL_TREE)
2103 return NULL;
2105 def = NULL_TREE;
2106 if (gimple_assign_cast_p (def_stmt))
2108 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2109 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2110 && TYPE_PRECISION (TREE_TYPE (rhs1))
2111 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2112 def = rhs1;
2115 if (def == NULL_TREE)
2117 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2118 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2119 new_pattern_def_seq (stmt_vinfo, def_stmt);
2122 /* Pattern detected. */
2123 if (dump_enabled_p ())
2124 dump_printf_loc (MSG_NOTE, vect_location,
2125 "vect_recog_vector_vector_shift_pattern: detected:\n");
2127 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2128 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2129 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2131 if (dump_enabled_p ())
2132 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2134 stmts->safe_push (last_stmt);
2135 return pattern_stmt;
2138 /* Detect multiplication by constant which are postive or negatives of power 2,
2139 and convert them to shift patterns.
2141 Mult with constants that are postive power of two.
2142 type a_t;
2143 type b_t
2144 S1: b_t = a_t * n
2148 Mult with constants that are negative power of two.
2149 S2: b_t = a_t * -n
2151 Input/Output:
2153 STMTS: Contains a stmt from which the pattern search begins,
2154 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2155 constant operand is a power of 2.
2156 type a_t, b_t
2157 S1': b_t = a_t << log2 (n)
2159 Convert the mult operation to LSHIFT and followed by a NEGATE
2160 if constant operand is a negative power of 2.
2161 type a_t, b_t, res_T;
2162 S2': b_t = a_t << log2 (n)
2163 S3': res_T = - (b_t)
2165 Output:
2167 * TYPE_IN: The type of the input arguments to the pattern.
2169 * TYPE_OUT: The type of the output of this pattern.
2171 * Return value: A new stmt that will be used to replace the multiplication
2172 S1 or S2 stmt. */
2174 static gimple *
2175 vect_recog_mult_pattern (vec<gimple *> *stmts,
2176 tree *type_in, tree *type_out)
2178 gimple *last_stmt = stmts->pop ();
2179 tree oprnd0, oprnd1, vectype, itype;
2180 gimple *pattern_stmt, *def_stmt;
2181 optab optab;
2182 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2183 int power2_val, power2_neg_val;
2184 tree shift;
2186 if (!is_gimple_assign (last_stmt))
2187 return NULL;
2189 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2190 return NULL;
2192 oprnd0 = gimple_assign_rhs1 (last_stmt);
2193 oprnd1 = gimple_assign_rhs2 (last_stmt);
2194 itype = TREE_TYPE (oprnd0);
2196 if (TREE_CODE (oprnd0) != SSA_NAME
2197 || TREE_CODE (oprnd1) != INTEGER_CST
2198 || !INTEGRAL_TYPE_P (itype)
2199 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2200 return NULL;
2202 vectype = get_vectype_for_scalar_type (itype);
2203 if (vectype == NULL_TREE)
2204 return NULL;
2206 /* If the target can handle vectorized multiplication natively,
2207 don't attempt to optimize this. */
2208 optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2209 if (optab != unknown_optab)
2211 machine_mode vec_mode = TYPE_MODE (vectype);
2212 int icode = (int) optab_handler (optab, vec_mode);
2213 if (icode != CODE_FOR_nothing)
2214 return NULL;
2217 /* If target cannot handle vector left shift then we cannot
2218 optimize and bail out. */
2219 optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2220 if (!optab
2221 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2222 return NULL;
2224 power2_val = wi::exact_log2 (oprnd1);
2225 power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2227 /* Handle constant operands that are postive or negative powers of 2. */
2228 if (power2_val != -1)
2230 shift = build_int_cst (itype, power2_val);
2231 pattern_stmt
2232 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2233 LSHIFT_EXPR, oprnd0, shift);
2235 else if (power2_neg_val != -1)
2237 /* If the target cannot handle vector NEGATE then we cannot
2238 do the optimization. */
2239 optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2240 if (!optab
2241 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2242 return NULL;
2244 shift = build_int_cst (itype, power2_neg_val);
2245 def_stmt
2246 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2247 LSHIFT_EXPR, oprnd0, shift);
2248 new_pattern_def_seq (stmt_vinfo, def_stmt);
2249 pattern_stmt
2250 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2251 NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2253 else
2254 return NULL;
2256 /* Pattern detected. */
2257 if (dump_enabled_p ())
2258 dump_printf_loc (MSG_NOTE, vect_location,
2259 "vect_recog_mult_pattern: detected:\n");
2261 if (dump_enabled_p ())
2262 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2263 pattern_stmt,0);
2265 stmts->safe_push (last_stmt);
2266 *type_in = vectype;
2267 *type_out = vectype;
2269 return pattern_stmt;
2272 /* Detect a signed division by a constant that wouldn't be
2273 otherwise vectorized:
2275 type a_t, b_t;
2277 S1 a_t = b_t / N;
2279 where type 'type' is an integral type and N is a constant.
2281 Similarly handle modulo by a constant:
2283 S4 a_t = b_t % N;
2285 Input/Output:
2287 * STMTS: Contains a stmt from which the pattern search begins,
2288 i.e. the division stmt. S1 is replaced by if N is a power
2289 of two constant and type is signed:
2290 S3 y_t = b_t < 0 ? N - 1 : 0;
2291 S2 x_t = b_t + y_t;
2292 S1' a_t = x_t >> log2 (N);
2294 S4 is replaced if N is a power of two constant and
2295 type is signed by (where *_T temporaries have unsigned type):
2296 S9 y_T = b_t < 0 ? -1U : 0U;
2297 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2298 S7 z_t = (type) z_T;
2299 S6 w_t = b_t + z_t;
2300 S5 x_t = w_t & (N - 1);
2301 S4' a_t = x_t - z_t;
2303 Output:
2305 * TYPE_IN: The type of the input arguments to the pattern.
2307 * TYPE_OUT: The type of the output of this pattern.
2309 * Return value: A new stmt that will be used to replace the division
2310 S1 or modulo S4 stmt. */
2312 static gimple *
2313 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2314 tree *type_in, tree *type_out)
2316 gimple *last_stmt = stmts->pop ();
2317 tree oprnd0, oprnd1, vectype, itype, cond;
2318 gimple *pattern_stmt, *def_stmt;
2319 enum tree_code rhs_code;
2320 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2321 vec_info *vinfo = stmt_vinfo->vinfo;
2322 optab optab;
2323 tree q;
2324 int dummy_int, prec;
2325 stmt_vec_info def_stmt_vinfo;
2327 if (!is_gimple_assign (last_stmt))
2328 return NULL;
2330 rhs_code = gimple_assign_rhs_code (last_stmt);
2331 switch (rhs_code)
2333 case TRUNC_DIV_EXPR:
2334 case TRUNC_MOD_EXPR:
2335 break;
2336 default:
2337 return NULL;
2340 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2341 return NULL;
2343 oprnd0 = gimple_assign_rhs1 (last_stmt);
2344 oprnd1 = gimple_assign_rhs2 (last_stmt);
2345 itype = TREE_TYPE (oprnd0);
2346 if (TREE_CODE (oprnd0) != SSA_NAME
2347 || TREE_CODE (oprnd1) != INTEGER_CST
2348 || TREE_CODE (itype) != INTEGER_TYPE
2349 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2350 return NULL;
2352 vectype = get_vectype_for_scalar_type (itype);
2353 if (vectype == NULL_TREE)
2354 return NULL;
2356 /* If the target can handle vectorized division or modulo natively,
2357 don't attempt to optimize this. */
2358 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2359 if (optab != unknown_optab)
2361 machine_mode vec_mode = TYPE_MODE (vectype);
2362 int icode = (int) optab_handler (optab, vec_mode);
2363 if (icode != CODE_FOR_nothing)
2364 return NULL;
2367 prec = TYPE_PRECISION (itype);
2368 if (integer_pow2p (oprnd1))
2370 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2371 return NULL;
2373 /* Pattern detected. */
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_NOTE, vect_location,
2376 "vect_recog_divmod_pattern: detected:\n");
2378 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2379 build_int_cst (itype, 0));
2380 if (rhs_code == TRUNC_DIV_EXPR)
2382 tree var = vect_recog_temp_ssa_var (itype, NULL);
2383 tree shift;
2384 def_stmt
2385 = gimple_build_assign (var, COND_EXPR, cond,
2386 fold_build2 (MINUS_EXPR, itype, oprnd1,
2387 build_int_cst (itype, 1)),
2388 build_int_cst (itype, 0));
2389 new_pattern_def_seq (stmt_vinfo, def_stmt);
2390 var = vect_recog_temp_ssa_var (itype, NULL);
2391 def_stmt
2392 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2393 gimple_assign_lhs (def_stmt));
2394 append_pattern_def_seq (stmt_vinfo, def_stmt);
2396 shift = build_int_cst (itype, tree_log2 (oprnd1));
2397 pattern_stmt
2398 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2399 RSHIFT_EXPR, var, shift);
2401 else
2403 tree signmask;
2404 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2405 if (compare_tree_int (oprnd1, 2) == 0)
2407 signmask = vect_recog_temp_ssa_var (itype, NULL);
2408 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2409 build_int_cst (itype, 1),
2410 build_int_cst (itype, 0));
2411 append_pattern_def_seq (stmt_vinfo, def_stmt);
2413 else
2415 tree utype
2416 = build_nonstandard_integer_type (prec, 1);
2417 tree vecutype = get_vectype_for_scalar_type (utype);
2418 tree shift
2419 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2420 - tree_log2 (oprnd1));
2421 tree var = vect_recog_temp_ssa_var (utype, NULL);
2423 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2424 build_int_cst (utype, -1),
2425 build_int_cst (utype, 0));
2426 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2427 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2428 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2429 append_pattern_def_seq (stmt_vinfo, def_stmt);
2430 var = vect_recog_temp_ssa_var (utype, NULL);
2431 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2432 gimple_assign_lhs (def_stmt),
2433 shift);
2434 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2435 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2436 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2437 append_pattern_def_seq (stmt_vinfo, def_stmt);
2438 signmask = vect_recog_temp_ssa_var (itype, NULL);
2439 def_stmt
2440 = gimple_build_assign (signmask, NOP_EXPR, var);
2441 append_pattern_def_seq (stmt_vinfo, def_stmt);
2443 def_stmt
2444 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2445 PLUS_EXPR, oprnd0, signmask);
2446 append_pattern_def_seq (stmt_vinfo, def_stmt);
2447 def_stmt
2448 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2449 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2450 fold_build2 (MINUS_EXPR, itype, oprnd1,
2451 build_int_cst (itype, 1)));
2452 append_pattern_def_seq (stmt_vinfo, def_stmt);
2454 pattern_stmt
2455 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2456 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2457 signmask);
2460 if (dump_enabled_p ())
2461 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2464 stmts->safe_push (last_stmt);
2466 *type_in = vectype;
2467 *type_out = vectype;
2468 return pattern_stmt;
2471 if (prec > HOST_BITS_PER_WIDE_INT
2472 || integer_zerop (oprnd1))
2473 return NULL;
2475 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2476 return NULL;
2478 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2480 if (TYPE_UNSIGNED (itype))
2482 unsigned HOST_WIDE_INT mh, ml;
2483 int pre_shift, post_shift;
2484 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2485 & GET_MODE_MASK (TYPE_MODE (itype)));
2486 tree t1, t2, t3, t4;
2488 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2489 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2490 return NULL;
2492 /* Find a suitable multiplier and right shift count
2493 instead of multiplying with D. */
2494 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2496 /* If the suggested multiplier is more than SIZE bits, we can do better
2497 for even divisors, using an initial right shift. */
2498 if (mh != 0 && (d & 1) == 0)
2500 pre_shift = floor_log2 (d & -d);
2501 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2502 &ml, &post_shift, &dummy_int);
2503 gcc_assert (!mh);
2505 else
2506 pre_shift = 0;
2508 if (mh != 0)
2510 if (post_shift - 1 >= prec)
2511 return NULL;
2513 /* t1 = oprnd0 h* ml;
2514 t2 = oprnd0 - t1;
2515 t3 = t2 >> 1;
2516 t4 = t1 + t3;
2517 q = t4 >> (post_shift - 1); */
2518 t1 = vect_recog_temp_ssa_var (itype, NULL);
2519 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2520 build_int_cst (itype, ml));
2521 append_pattern_def_seq (stmt_vinfo, def_stmt);
2523 t2 = vect_recog_temp_ssa_var (itype, NULL);
2524 def_stmt
2525 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2526 append_pattern_def_seq (stmt_vinfo, def_stmt);
2528 t3 = vect_recog_temp_ssa_var (itype, NULL);
2529 def_stmt
2530 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2531 append_pattern_def_seq (stmt_vinfo, def_stmt);
2533 t4 = vect_recog_temp_ssa_var (itype, NULL);
2534 def_stmt
2535 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2537 if (post_shift != 1)
2539 append_pattern_def_seq (stmt_vinfo, def_stmt);
2541 q = vect_recog_temp_ssa_var (itype, NULL);
2542 pattern_stmt
2543 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2544 build_int_cst (itype, post_shift - 1));
2546 else
2548 q = t4;
2549 pattern_stmt = def_stmt;
2552 else
2554 if (pre_shift >= prec || post_shift >= prec)
2555 return NULL;
2557 /* t1 = oprnd0 >> pre_shift;
2558 t2 = t1 h* ml;
2559 q = t2 >> post_shift; */
2560 if (pre_shift)
2562 t1 = vect_recog_temp_ssa_var (itype, NULL);
2563 def_stmt
2564 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2565 build_int_cst (NULL, pre_shift));
2566 append_pattern_def_seq (stmt_vinfo, def_stmt);
2568 else
2569 t1 = oprnd0;
2571 t2 = vect_recog_temp_ssa_var (itype, NULL);
2572 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2573 build_int_cst (itype, ml));
2575 if (post_shift)
2577 append_pattern_def_seq (stmt_vinfo, def_stmt);
2579 q = vect_recog_temp_ssa_var (itype, NULL);
2580 def_stmt
2581 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2582 build_int_cst (itype, post_shift));
2584 else
2585 q = t2;
2587 pattern_stmt = def_stmt;
2590 else
2592 unsigned HOST_WIDE_INT ml;
2593 int post_shift;
2594 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2595 unsigned HOST_WIDE_INT abs_d;
2596 bool add = false;
2597 tree t1, t2, t3, t4;
2599 /* Give up for -1. */
2600 if (d == -1)
2601 return NULL;
2603 /* Since d might be INT_MIN, we have to cast to
2604 unsigned HOST_WIDE_INT before negating to avoid
2605 undefined signed overflow. */
2606 abs_d = (d >= 0
2607 ? (unsigned HOST_WIDE_INT) d
2608 : - (unsigned HOST_WIDE_INT) d);
2610 /* n rem d = n rem -d */
2611 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2613 d = abs_d;
2614 oprnd1 = build_int_cst (itype, abs_d);
2616 else if (HOST_BITS_PER_WIDE_INT >= prec
2617 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2618 /* This case is not handled correctly below. */
2619 return NULL;
2621 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2622 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2624 add = true;
2625 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2627 if (post_shift >= prec)
2628 return NULL;
2630 /* t1 = oprnd0 h* ml; */
2631 t1 = vect_recog_temp_ssa_var (itype, NULL);
2632 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2633 build_int_cst (itype, ml));
2635 if (add)
2637 /* t2 = t1 + oprnd0; */
2638 append_pattern_def_seq (stmt_vinfo, def_stmt);
2639 t2 = vect_recog_temp_ssa_var (itype, NULL);
2640 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2642 else
2643 t2 = t1;
2645 if (post_shift)
2647 /* t3 = t2 >> post_shift; */
2648 append_pattern_def_seq (stmt_vinfo, def_stmt);
2649 t3 = vect_recog_temp_ssa_var (itype, NULL);
2650 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2651 build_int_cst (itype, post_shift));
2653 else
2654 t3 = t2;
2656 wide_int oprnd0_min, oprnd0_max;
2657 int msb = 1;
2658 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2660 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2661 msb = 0;
2662 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2663 msb = -1;
2666 if (msb == 0 && d >= 0)
2668 /* q = t3; */
2669 q = t3;
2670 pattern_stmt = def_stmt;
2672 else
2674 /* t4 = oprnd0 >> (prec - 1);
2675 or if we know from VRP that oprnd0 >= 0
2676 t4 = 0;
2677 or if we know from VRP that oprnd0 < 0
2678 t4 = -1; */
2679 append_pattern_def_seq (stmt_vinfo, def_stmt);
2680 t4 = vect_recog_temp_ssa_var (itype, NULL);
2681 if (msb != 1)
2682 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2683 build_int_cst (itype, msb));
2684 else
2685 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2686 build_int_cst (itype, prec - 1));
2687 append_pattern_def_seq (stmt_vinfo, def_stmt);
2689 /* q = t3 - t4; or q = t4 - t3; */
2690 q = vect_recog_temp_ssa_var (itype, NULL);
2691 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2692 d < 0 ? t3 : t4);
2696 if (rhs_code == TRUNC_MOD_EXPR)
2698 tree r, t1;
2700 /* We divided. Now finish by:
2701 t1 = q * oprnd1;
2702 r = oprnd0 - t1; */
2703 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2705 t1 = vect_recog_temp_ssa_var (itype, NULL);
2706 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2707 append_pattern_def_seq (stmt_vinfo, def_stmt);
2709 r = vect_recog_temp_ssa_var (itype, NULL);
2710 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2713 /* Pattern detected. */
2714 if (dump_enabled_p ())
2716 dump_printf_loc (MSG_NOTE, vect_location,
2717 "vect_recog_divmod_pattern: detected: ");
2718 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2719 dump_printf (MSG_NOTE, "\n");
2722 stmts->safe_push (last_stmt);
2724 *type_in = vectype;
2725 *type_out = vectype;
2726 return pattern_stmt;
2729 /* Function vect_recog_mixed_size_cond_pattern
2731 Try to find the following pattern:
2733 type x_t, y_t;
2734 TYPE a_T, b_T, c_T;
2735 loop:
2736 S1 a_T = x_t CMP y_t ? b_T : c_T;
2738 where type 'TYPE' is an integral type which has different size
2739 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2740 than 'type', the constants need to fit into an integer type
2741 with the same width as 'type') or results of conversion from 'type'.
2743 Input:
2745 * LAST_STMT: A stmt from which the pattern search begins.
2747 Output:
2749 * TYPE_IN: The type of the input arguments to the pattern.
2751 * TYPE_OUT: The type of the output of this pattern.
2753 * Return value: A new stmt that will be used to replace the pattern.
2754 Additionally a def_stmt is added.
2756 a_it = x_t CMP y_t ? b_it : c_it;
2757 a_T = (TYPE) a_it; */
2759 static gimple *
2760 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
2761 tree *type_out)
2763 gimple *last_stmt = (*stmts)[0];
2764 tree cond_expr, then_clause, else_clause;
2765 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2766 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2767 gimple *pattern_stmt, *def_stmt;
2768 vec_info *vinfo = stmt_vinfo->vinfo;
2769 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2770 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
2771 bool promotion;
2772 tree comp_scalar_type;
2774 if (!is_gimple_assign (last_stmt)
2775 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2776 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2777 return NULL;
2779 cond_expr = gimple_assign_rhs1 (last_stmt);
2780 then_clause = gimple_assign_rhs2 (last_stmt);
2781 else_clause = gimple_assign_rhs3 (last_stmt);
2783 if (!COMPARISON_CLASS_P (cond_expr))
2784 return NULL;
2786 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2787 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2788 if (comp_vectype == NULL_TREE)
2789 return NULL;
2791 type = gimple_expr_type (last_stmt);
2792 if (types_compatible_p (type, comp_scalar_type)
2793 || ((TREE_CODE (then_clause) != INTEGER_CST
2794 || TREE_CODE (else_clause) != INTEGER_CST)
2795 && !INTEGRAL_TYPE_P (comp_scalar_type))
2796 || !INTEGRAL_TYPE_P (type))
2797 return NULL;
2799 if ((TREE_CODE (then_clause) != INTEGER_CST
2800 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2801 &def_stmt0, &promotion))
2802 || (TREE_CODE (else_clause) != INTEGER_CST
2803 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2804 &def_stmt1, &promotion)))
2805 return NULL;
2807 if (orig_type0 && orig_type1
2808 && !types_compatible_p (orig_type0, orig_type1))
2809 return NULL;
2811 if (orig_type0)
2813 if (!types_compatible_p (orig_type0, comp_scalar_type))
2814 return NULL;
2815 then_clause = gimple_assign_rhs1 (def_stmt0);
2816 itype = orig_type0;
2819 if (orig_type1)
2821 if (!types_compatible_p (orig_type1, comp_scalar_type))
2822 return NULL;
2823 else_clause = gimple_assign_rhs1 (def_stmt1);
2824 itype = orig_type1;
2828 HOST_WIDE_INT cmp_mode_size
2829 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2831 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
2832 return NULL;
2834 vectype = get_vectype_for_scalar_type (type);
2835 if (vectype == NULL_TREE)
2836 return NULL;
2838 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2839 return NULL;
2841 if (itype == NULL_TREE)
2842 itype = build_nonstandard_integer_type (cmp_mode_size,
2843 TYPE_UNSIGNED (type));
2845 if (itype == NULL_TREE
2846 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
2847 return NULL;
2849 vecitype = get_vectype_for_scalar_type (itype);
2850 if (vecitype == NULL_TREE)
2851 return NULL;
2853 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2854 return NULL;
2856 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
2858 if ((TREE_CODE (then_clause) == INTEGER_CST
2859 && !int_fits_type_p (then_clause, itype))
2860 || (TREE_CODE (else_clause) == INTEGER_CST
2861 && !int_fits_type_p (else_clause, itype)))
2862 return NULL;
2865 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2866 COND_EXPR, unshare_expr (cond_expr),
2867 fold_convert (itype, then_clause),
2868 fold_convert (itype, else_clause));
2869 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2870 NOP_EXPR, gimple_assign_lhs (def_stmt));
2872 new_pattern_def_seq (stmt_vinfo, def_stmt);
2873 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
2874 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2875 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2876 *type_in = vecitype;
2877 *type_out = vectype;
2879 if (dump_enabled_p ())
2880 dump_printf_loc (MSG_NOTE, vect_location,
2881 "vect_recog_mixed_size_cond_pattern: detected:\n");
2883 return pattern_stmt;
2887 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2888 true if bool VAR can be optimized that way. */
2890 static bool
2891 check_bool_pattern (tree var, vec_info *vinfo)
2893 gimple *def_stmt;
2894 enum vect_def_type dt;
2895 tree def, rhs1;
2896 enum tree_code rhs_code;
2898 if (!vect_is_simple_use (var, NULL, vinfo, &def_stmt, &def,
2899 &dt))
2900 return false;
2902 if (dt != vect_internal_def)
2903 return false;
2905 if (!is_gimple_assign (def_stmt))
2906 return false;
2908 if (!has_single_use (def))
2909 return false;
2911 rhs1 = gimple_assign_rhs1 (def_stmt);
2912 rhs_code = gimple_assign_rhs_code (def_stmt);
2913 switch (rhs_code)
2915 case SSA_NAME:
2916 return check_bool_pattern (rhs1, vinfo);
2918 CASE_CONVERT:
2919 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2920 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2921 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2922 return false;
2923 return check_bool_pattern (rhs1, vinfo);
2925 case BIT_NOT_EXPR:
2926 return check_bool_pattern (rhs1, vinfo);
2928 case BIT_AND_EXPR:
2929 case BIT_IOR_EXPR:
2930 case BIT_XOR_EXPR:
2931 if (!check_bool_pattern (rhs1, vinfo))
2932 return false;
2933 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
2935 default:
2936 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2938 tree vecitype, comp_vectype;
2940 /* If the comparison can throw, then is_gimple_condexpr will be
2941 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2942 if (stmt_could_throw_p (def_stmt))
2943 return false;
2945 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2946 if (comp_vectype == NULL_TREE)
2947 return false;
2949 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2951 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2952 tree itype
2953 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2954 vecitype = get_vectype_for_scalar_type (itype);
2955 if (vecitype == NULL_TREE)
2956 return false;
2958 else
2959 vecitype = comp_vectype;
2960 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2962 return false;
2967 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2968 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2969 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2971 static tree
2972 adjust_bool_pattern_cast (tree type, tree var)
2974 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2975 gimple *cast_stmt, *pattern_stmt;
2977 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2978 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2979 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2980 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2981 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2982 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2983 return gimple_assign_lhs (cast_stmt);
2987 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2988 recursively. VAR is an SSA_NAME that should be transformed from bool
2989 to a wider integer type, OUT_TYPE is the desired final integer type of
2990 the whole pattern, TRUEVAL should be NULL unless optimizing
2991 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2992 in the then_clause, STMTS is where statements with added pattern stmts
2993 should be pushed to. */
2995 static tree
2996 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2997 vec<gimple *> *stmts)
2999 gimple *stmt = SSA_NAME_DEF_STMT (var);
3000 enum tree_code rhs_code, def_rhs_code;
3001 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3002 location_t loc;
3003 gimple *pattern_stmt, *def_stmt;
3005 rhs1 = gimple_assign_rhs1 (stmt);
3006 rhs2 = gimple_assign_rhs2 (stmt);
3007 rhs_code = gimple_assign_rhs_code (stmt);
3008 loc = gimple_location (stmt);
3009 switch (rhs_code)
3011 case SSA_NAME:
3012 CASE_CONVERT:
3013 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3014 itype = TREE_TYPE (irhs1);
3015 pattern_stmt
3016 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3017 SSA_NAME, irhs1);
3018 break;
3020 case BIT_NOT_EXPR:
3021 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3022 itype = TREE_TYPE (irhs1);
3023 pattern_stmt
3024 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3025 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3026 break;
3028 case BIT_AND_EXPR:
3029 /* Try to optimize x = y & (a < b ? 1 : 0); into
3030 x = (a < b ? y : 0);
3032 E.g. for:
3033 bool a_b, b_b, c_b;
3034 TYPE d_T;
3036 S1 a_b = x1 CMP1 y1;
3037 S2 b_b = x2 CMP2 y2;
3038 S3 c_b = a_b & b_b;
3039 S4 d_T = (TYPE) c_b;
3041 we would normally emit:
3043 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3044 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3045 S3' c_T = a_T & b_T;
3046 S4' d_T = c_T;
3048 but we can save one stmt by using the
3049 result of one of the COND_EXPRs in the other COND_EXPR and leave
3050 BIT_AND_EXPR stmt out:
3052 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3053 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3054 S4' f_T = c_T;
3056 At least when VEC_COND_EXPR is implemented using masks
3057 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3058 computes the comparison masks and ands it, in one case with
3059 all ones vector, in the other case with a vector register.
3060 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3061 often more expensive. */
3062 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3063 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3064 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3066 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3067 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3068 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3069 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3071 gimple *tstmt;
3072 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3073 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3074 tstmt = stmts->pop ();
3075 gcc_assert (tstmt == def_stmt);
3076 stmts->quick_push (stmt);
3077 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3078 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3079 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3080 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3081 return irhs2;
3083 else
3084 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3085 goto and_ior_xor;
3087 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3088 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3089 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3091 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3092 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3093 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3094 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3096 gimple *tstmt;
3097 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3098 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3099 tstmt = stmts->pop ();
3100 gcc_assert (tstmt == def_stmt);
3101 stmts->quick_push (stmt);
3102 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3103 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3104 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3105 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3106 return irhs1;
3108 else
3109 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3110 goto and_ior_xor;
3112 /* FALLTHRU */
3113 case BIT_IOR_EXPR:
3114 case BIT_XOR_EXPR:
3115 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3116 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3117 and_ior_xor:
3118 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3119 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3121 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3122 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3123 int out_prec = TYPE_PRECISION (out_type);
3124 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3125 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3126 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3127 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3128 else
3130 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3131 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3134 itype = TREE_TYPE (irhs1);
3135 pattern_stmt
3136 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3137 rhs_code, irhs1, irhs2);
3138 break;
3140 default:
3141 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3142 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3143 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3144 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3145 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3147 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3148 itype
3149 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3151 else
3152 itype = TREE_TYPE (rhs1);
3153 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3154 if (trueval == NULL_TREE)
3155 trueval = build_int_cst (itype, 1);
3156 else
3157 gcc_checking_assert (useless_type_conversion_p (itype,
3158 TREE_TYPE (trueval)));
3159 pattern_stmt
3160 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3161 COND_EXPR, cond_expr, trueval,
3162 build_int_cst (itype, 0));
3163 break;
3166 stmts->safe_push (stmt);
3167 gimple_set_location (pattern_stmt, loc);
3168 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3169 return gimple_assign_lhs (pattern_stmt);
3173 /* Function vect_recog_bool_pattern
3175 Try to find pattern like following:
3177 bool a_b, b_b, c_b, d_b, e_b;
3178 TYPE f_T;
3179 loop:
3180 S1 a_b = x1 CMP1 y1;
3181 S2 b_b = x2 CMP2 y2;
3182 S3 c_b = a_b & b_b;
3183 S4 d_b = x3 CMP3 y3;
3184 S5 e_b = c_b | d_b;
3185 S6 f_T = (TYPE) e_b;
3187 where type 'TYPE' is an integral type. Or a similar pattern
3188 ending in
3190 S6 f_Y = e_b ? r_Y : s_Y;
3192 as results from if-conversion of a complex condition.
3194 Input:
3196 * LAST_STMT: A stmt at the end from which the pattern
3197 search begins, i.e. cast of a bool to
3198 an integer type.
3200 Output:
3202 * TYPE_IN: The type of the input arguments to the pattern.
3204 * TYPE_OUT: The type of the output of this pattern.
3206 * Return value: A new stmt that will be used to replace the pattern.
3208 Assuming size of TYPE is the same as size of all comparisons
3209 (otherwise some casts would be added where needed), the above
3210 sequence we create related pattern stmts:
3211 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3212 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3213 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3214 S5' e_T = c_T | d_T;
3215 S6' f_T = e_T;
3217 Instead of the above S3' we could emit:
3218 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3219 S3' c_T = a_T | b_T;
3220 but the above is more efficient. */
3222 static gimple *
3223 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3224 tree *type_out)
3226 gimple *last_stmt = stmts->pop ();
3227 enum tree_code rhs_code;
3228 tree var, lhs, rhs, vectype;
3229 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3230 vec_info *vinfo = stmt_vinfo->vinfo;
3231 gimple *pattern_stmt;
3233 if (!is_gimple_assign (last_stmt))
3234 return NULL;
3236 var = gimple_assign_rhs1 (last_stmt);
3237 lhs = gimple_assign_lhs (last_stmt);
3239 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3240 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3241 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3242 return NULL;
3244 rhs_code = gimple_assign_rhs_code (last_stmt);
3245 if (CONVERT_EXPR_CODE_P (rhs_code))
3247 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3248 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3249 return NULL;
3250 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3251 if (vectype == NULL_TREE)
3252 return NULL;
3254 if (!check_bool_pattern (var, vinfo))
3255 return NULL;
3257 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3258 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3259 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3260 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3261 else
3262 pattern_stmt
3263 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3264 *type_out = vectype;
3265 *type_in = vectype;
3266 stmts->safe_push (last_stmt);
3267 if (dump_enabled_p ())
3268 dump_printf_loc (MSG_NOTE, vect_location,
3269 "vect_recog_bool_pattern: detected:\n");
3271 return pattern_stmt;
3273 else if (rhs_code == COND_EXPR
3274 && TREE_CODE (var) == SSA_NAME)
3276 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3277 if (vectype == NULL_TREE)
3278 return NULL;
3280 /* Build a scalar type for the boolean result that when
3281 vectorized matches the vector type of the result in
3282 size and number of elements. */
3283 unsigned prec
3284 = wi::udiv_trunc (TYPE_SIZE (vectype),
3285 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3286 tree type
3287 = build_nonstandard_integer_type (prec,
3288 TYPE_UNSIGNED (TREE_TYPE (var)));
3289 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3290 return NULL;
3292 if (!check_bool_pattern (var, vinfo))
3293 return NULL;
3295 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3296 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3297 pattern_stmt
3298 = gimple_build_assign (lhs, COND_EXPR,
3299 build2 (NE_EXPR, boolean_type_node,
3300 rhs, build_int_cst (type, 0)),
3301 gimple_assign_rhs2 (last_stmt),
3302 gimple_assign_rhs3 (last_stmt));
3303 *type_out = vectype;
3304 *type_in = vectype;
3305 stmts->safe_push (last_stmt);
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_NOTE, vect_location,
3308 "vect_recog_bool_pattern: detected:\n");
3310 return pattern_stmt;
3312 else if (rhs_code == SSA_NAME
3313 && STMT_VINFO_DATA_REF (stmt_vinfo))
3315 stmt_vec_info pattern_stmt_info;
3316 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3317 gcc_assert (vectype != NULL_TREE);
3318 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3319 return NULL;
3320 if (!check_bool_pattern (var, vinfo))
3321 return NULL;
3323 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3324 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3325 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3327 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3328 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3329 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3330 rhs = rhs2;
3332 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3333 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3334 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3335 STMT_VINFO_DATA_REF (pattern_stmt_info)
3336 = STMT_VINFO_DATA_REF (stmt_vinfo);
3337 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3338 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3339 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3340 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3341 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3342 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3343 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3344 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3345 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3346 *type_out = vectype;
3347 *type_in = vectype;
3348 stmts->safe_push (last_stmt);
3349 if (dump_enabled_p ())
3350 dump_printf_loc (MSG_NOTE, vect_location,
3351 "vect_recog_bool_pattern: detected:\n");
3352 return pattern_stmt;
3354 else
3355 return NULL;
3359 /* Mark statements that are involved in a pattern. */
3361 static inline void
3362 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
3363 tree pattern_vectype)
3365 stmt_vec_info pattern_stmt_info, def_stmt_info;
3366 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3367 vec_info *vinfo = orig_stmt_info->vinfo;
3368 gimple *def_stmt;
3370 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3371 if (pattern_stmt_info == NULL)
3373 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3374 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3376 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3378 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3379 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3380 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3381 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3382 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3383 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3384 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3385 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3386 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3388 gimple_stmt_iterator si;
3389 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3390 !gsi_end_p (si); gsi_next (&si))
3392 def_stmt = gsi_stmt (si);
3393 def_stmt_info = vinfo_for_stmt (def_stmt);
3394 if (def_stmt_info == NULL)
3396 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3397 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3399 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3400 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3401 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3402 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3403 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3408 /* Function vect_pattern_recog_1
3410 Input:
3411 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3412 computation pattern.
3413 STMT: A stmt from which the pattern search should start.
3415 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3416 expression that computes the same functionality and can be used to
3417 replace the sequence of stmts that are involved in the pattern.
3419 Output:
3420 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3421 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3422 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3423 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3424 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3425 to the available target pattern.
3427 This function also does some bookkeeping, as explained in the documentation
3428 for vect_recog_pattern. */
3430 static void
3431 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3432 gimple_stmt_iterator si,
3433 vec<gimple *> *stmts_to_replace)
3435 gimple *stmt = gsi_stmt (si), *pattern_stmt;
3436 stmt_vec_info stmt_info;
3437 loop_vec_info loop_vinfo;
3438 tree pattern_vectype;
3439 tree type_in, type_out;
3440 enum tree_code code;
3441 int i;
3442 gimple *next;
3444 stmts_to_replace->truncate (0);
3445 stmts_to_replace->quick_push (stmt);
3446 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3447 if (!pattern_stmt)
3448 return;
3450 stmt = stmts_to_replace->last ();
3451 stmt_info = vinfo_for_stmt (stmt);
3452 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3454 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3456 /* No need to check target support (already checked by the pattern
3457 recognition function). */
3458 pattern_vectype = type_out ? type_out : type_in;
3460 else
3462 machine_mode vec_mode;
3463 enum insn_code icode;
3464 optab optab;
3466 /* Check target support */
3467 type_in = get_vectype_for_scalar_type (type_in);
3468 if (!type_in)
3469 return;
3470 if (type_out)
3471 type_out = get_vectype_for_scalar_type (type_out);
3472 else
3473 type_out = type_in;
3474 if (!type_out)
3475 return;
3476 pattern_vectype = type_out;
3478 if (is_gimple_assign (pattern_stmt))
3479 code = gimple_assign_rhs_code (pattern_stmt);
3480 else
3482 gcc_assert (is_gimple_call (pattern_stmt));
3483 code = CALL_EXPR;
3486 optab = optab_for_tree_code (code, type_in, optab_default);
3487 vec_mode = TYPE_MODE (type_in);
3488 if (!optab
3489 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3490 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3491 return;
3494 /* Found a vectorizable pattern. */
3495 if (dump_enabled_p ())
3497 dump_printf_loc (MSG_NOTE, vect_location,
3498 "pattern recognized: ");
3499 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3502 /* Mark the stmts that are involved in the pattern. */
3503 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3505 /* Patterns cannot be vectorized using SLP, because they change the order of
3506 computation. */
3507 if (loop_vinfo)
3508 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3509 if (next == stmt)
3510 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3512 /* It is possible that additional pattern stmts are created and inserted in
3513 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3514 relevant statements. */
3515 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3516 && (unsigned) i < (stmts_to_replace->length () - 1);
3517 i++)
3519 stmt_info = vinfo_for_stmt (stmt);
3520 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3521 if (dump_enabled_p ())
3523 dump_printf_loc (MSG_NOTE, vect_location,
3524 "additional pattern stmt: ");
3525 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3528 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3533 /* Function vect_pattern_recog
3535 Input:
3536 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3537 computation idioms.
3539 Output - for each computation idiom that is detected we create a new stmt
3540 that provides the same functionality and that can be vectorized. We
3541 also record some information in the struct_stmt_info of the relevant
3542 stmts, as explained below:
3544 At the entry to this function we have the following stmts, with the
3545 following initial value in the STMT_VINFO fields:
3547 stmt in_pattern_p related_stmt vec_stmt
3548 S1: a_i = .... - - -
3549 S2: a_2 = ..use(a_i).. - - -
3550 S3: a_1 = ..use(a_2).. - - -
3551 S4: a_0 = ..use(a_1).. - - -
3552 S5: ... = ..use(a_0).. - - -
3554 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3555 represented by a single stmt. We then:
3556 - create a new stmt S6 equivalent to the pattern (the stmt is not
3557 inserted into the code)
3558 - fill in the STMT_VINFO fields as follows:
3560 in_pattern_p related_stmt vec_stmt
3561 S1: a_i = .... - - -
3562 S2: a_2 = ..use(a_i).. - - -
3563 S3: a_1 = ..use(a_2).. - - -
3564 S4: a_0 = ..use(a_1).. true S6 -
3565 '---> S6: a_new = .... - S4 -
3566 S5: ... = ..use(a_0).. - - -
3568 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3569 to each other through the RELATED_STMT field).
3571 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3572 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3573 remain irrelevant unless used by stmts other than S4.
3575 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3576 (because they are marked as irrelevant). It will vectorize S6, and record
3577 a pointer to the new vector stmt VS6 from S6 (as usual).
3578 S4 will be skipped, and S5 will be vectorized as usual:
3580 in_pattern_p related_stmt vec_stmt
3581 S1: a_i = .... - - -
3582 S2: a_2 = ..use(a_i).. - - -
3583 S3: a_1 = ..use(a_2).. - - -
3584 > VS6: va_new = .... - - -
3585 S4: a_0 = ..use(a_1).. true S6 VS6
3586 '---> S6: a_new = .... - S4 VS6
3587 > VS5: ... = ..vuse(va_new).. - - -
3588 S5: ... = ..use(a_0).. - - -
3590 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3591 elsewhere), and we'll end up with:
3593 VS6: va_new = ....
3594 VS5: ... = ..vuse(va_new)..
3596 In case of more than one pattern statements, e.g., widen-mult with
3597 intermediate type:
3599 S1 a_t = ;
3600 S2 a_T = (TYPE) a_t;
3601 '--> S3: a_it = (interm_type) a_t;
3602 S4 prod_T = a_T * CONST;
3603 '--> S5: prod_T' = a_it w* CONST;
3605 there may be other users of a_T outside the pattern. In that case S2 will
3606 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3607 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3608 be recorded in S3. */
3610 void
3611 vect_pattern_recog (vec_info *vinfo)
3613 struct loop *loop;
3614 basic_block *bbs;
3615 unsigned int nbbs;
3616 gimple_stmt_iterator si;
3617 unsigned int i, j;
3618 vect_recog_func_ptr vect_recog_func;
3619 auto_vec<gimple *, 1> stmts_to_replace;
3620 gimple *stmt;
3622 if (dump_enabled_p ())
3623 dump_printf_loc (MSG_NOTE, vect_location,
3624 "=== vect_pattern_recog ===\n");
3626 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
3628 loop = LOOP_VINFO_LOOP (loop_vinfo);
3629 bbs = LOOP_VINFO_BBS (loop_vinfo);
3630 nbbs = loop->num_nodes;
3632 else
3634 bbs = &as_a <bb_vec_info> (vinfo)->bb;
3635 nbbs = 1;
3638 /* Scan through the loop stmts, applying the pattern recognition
3639 functions starting at each stmt visited: */
3640 for (i = 0; i < nbbs; i++)
3642 basic_block bb = bbs[i];
3643 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3645 if (is_a <bb_vec_info> (vinfo)
3646 && (stmt = gsi_stmt (si))
3647 && vinfo_for_stmt (stmt)
3648 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3649 continue;
3651 /* Scan over all generic vect_recog_xxx_pattern functions. */
3652 for (j = 0; j < NUM_PATTERNS; j++)
3654 vect_recog_func = vect_vect_recog_func_ptrs[j];
3655 vect_pattern_recog_1 (vect_recog_func, si,
3656 &stmts_to_replace);