2014-07-29 Ed Smith-Rowland <3dw4rd@verizon.net>
[official-gcc.git] / gcc / tree-vect-patterns.c
blobea3d542da366fdd6b81c8f9781484147dcb3d117
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "stor-layout.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-ssa-alias.h"
31 #include "internal-fn.h"
32 #include "tree-eh.h"
33 #include "gimple-expr.h"
34 #include "is-a.h"
35 #include "gimple.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
43 #include "cfgloop.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "params.h"
47 #include "tree-data-ref.h"
48 #include "tree-vectorizer.h"
49 #include "recog.h" /* FIXME: for insn_data */
50 #include "diagnostic-core.h"
51 #include "dumpfile.h"
52 #include "builtins.h"
54 /* Pattern recognition functions */
55 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
56 tree *);
57 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
58 tree *);
59 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
60 tree *);
61 static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
62 tree *);
63 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
64 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
65 tree *);
66 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
67 tree *, tree *);
68 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
69 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
70 tree *, tree *);
71 static gimple vect_recog_divmod_pattern (vec<gimple> *,
72 tree *, tree *);
73 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
74 tree *, tree *);
75 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
76 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
77 vect_recog_widen_mult_pattern,
78 vect_recog_widen_sum_pattern,
79 vect_recog_dot_prod_pattern,
80 vect_recog_sad_pattern,
81 vect_recog_pow_pattern,
82 vect_recog_widen_shift_pattern,
83 vect_recog_over_widening_pattern,
84 vect_recog_rotate_pattern,
85 vect_recog_vector_vector_shift_pattern,
86 vect_recog_divmod_pattern,
87 vect_recog_mixed_size_cond_pattern,
88 vect_recog_bool_pattern};
90 static inline void
91 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
93 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
94 stmt);
97 static inline void
98 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
100 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
101 append_pattern_def_seq (stmt_info, stmt);
104 /* Check whether STMT2 is in the same loop or basic block as STMT1.
105 Which of the two applies depends on whether we're currently doing
106 loop-based or basic-block-based vectorization, as determined by
107 the vinfo_for_stmt for STMT1 (which must be defined).
109 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
110 to be defined as well. */
112 static bool
113 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
115 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
116 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
117 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
119 if (!gimple_bb (stmt2))
120 return false;
122 if (loop_vinfo)
124 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
125 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
126 return false;
128 else
130 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
131 || gimple_code (stmt2) == GIMPLE_PHI)
132 return false;
135 gcc_assert (vinfo_for_stmt (stmt2));
136 return true;
139 /* If the LHS of DEF_STMT has a single use, and that statement is
140 in the same loop or basic block, return it. */
142 static gimple
143 vect_single_imm_use (gimple def_stmt)
145 tree lhs = gimple_assign_lhs (def_stmt);
146 use_operand_p use_p;
147 gimple use_stmt;
149 if (!single_imm_use (lhs, &use_p, &use_stmt))
150 return NULL;
152 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
153 return NULL;
155 return use_stmt;
158 /* Check whether NAME, an ssa-name used in USE_STMT,
159 is a result of a type promotion, such that:
160 DEF_STMT: NAME = NOP (name0)
161 If CHECK_SIGN is TRUE, check that either both types are signed or both are
162 unsigned. */
164 static bool
165 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
166 tree *orig_type, gimple *def_stmt, bool *promotion)
168 tree dummy;
169 gimple dummy_gimple;
170 loop_vec_info loop_vinfo;
171 stmt_vec_info stmt_vinfo;
172 tree type = TREE_TYPE (name);
173 tree oprnd0;
174 enum vect_def_type dt;
175 tree def;
176 bb_vec_info bb_vinfo;
178 stmt_vinfo = vinfo_for_stmt (use_stmt);
179 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
180 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
181 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
182 &def, &dt))
183 return false;
185 if (dt != vect_internal_def
186 && dt != vect_external_def && dt != vect_constant_def)
187 return false;
189 if (!*def_stmt)
190 return false;
192 if (!is_gimple_assign (*def_stmt))
193 return false;
195 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
196 return false;
198 oprnd0 = gimple_assign_rhs1 (*def_stmt);
200 *orig_type = TREE_TYPE (oprnd0);
201 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
202 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
203 return false;
205 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
206 *promotion = true;
207 else
208 *promotion = false;
210 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
211 bb_vinfo, &dummy_gimple, &dummy, &dt))
212 return false;
214 return true;
217 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
218 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
220 static tree
221 vect_recog_temp_ssa_var (tree type, gimple stmt)
223 return make_temp_ssa_name (type, stmt, "patt");
226 /* Function vect_recog_dot_prod_pattern
228 Try to find the following pattern:
230 type x_t, y_t;
231 TYPE1 prod;
232 TYPE2 sum = init;
233 loop:
234 sum_0 = phi <init, sum_1>
235 S1 x_t = ...
236 S2 y_t = ...
237 S3 x_T = (TYPE1) x_t;
238 S4 y_T = (TYPE1) y_t;
239 S5 prod = x_T * y_T;
240 [S6 prod = (TYPE2) prod; #optional]
241 S7 sum_1 = prod + sum_0;
243 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
244 same size of 'TYPE1' or bigger. This is a special case of a reduction
245 computation.
247 Input:
249 * STMTS: Contains a stmt from which the pattern search begins. In the
250 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
251 will be detected.
253 Output:
255 * TYPE_IN: The type of the input arguments to the pattern.
257 * TYPE_OUT: The type of the output of this pattern.
259 * Return value: A new stmt that will be used to replace the sequence of
260 stmts that constitute the pattern. In this case it will be:
261 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
263 Note: The dot-prod idiom is a widening reduction pattern that is
264 vectorized without preserving all the intermediate results. It
265 produces only N/2 (widened) results (by summing up pairs of
266 intermediate results) rather than all N results. Therefore, we
267 cannot allow this pattern when we want to get all the results and in
268 the correct order (as is the case when this computation is in an
269 inner-loop nested in an outer-loop that us being vectorized). */
271 static gimple
272 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
273 tree *type_out)
275 gimple stmt, last_stmt = (*stmts)[0];
276 tree oprnd0, oprnd1;
277 tree oprnd00, oprnd01;
278 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
279 tree type, half_type;
280 gimple pattern_stmt;
281 tree prod_type;
282 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
283 struct loop *loop;
284 tree var;
285 bool promotion;
287 if (!loop_info)
288 return NULL;
290 loop = LOOP_VINFO_LOOP (loop_info);
292 if (!is_gimple_assign (last_stmt))
293 return NULL;
295 type = gimple_expr_type (last_stmt);
297 /* Look for the following pattern
298 DX = (TYPE1) X;
299 DY = (TYPE1) Y;
300 DPROD = DX * DY;
301 DDPROD = (TYPE2) DPROD;
302 sum_1 = DDPROD + sum_0;
303 In which
304 - DX is double the size of X
305 - DY is double the size of Y
306 - DX, DY, DPROD all have the same type
307 - sum is the same size of DPROD or bigger
308 - sum has been recognized as a reduction variable.
310 This is equivalent to:
311 DPROD = X w* Y; #widen mult
312 sum_1 = DPROD w+ sum_0; #widen summation
314 DPROD = X w* Y; #widen mult
315 sum_1 = DPROD + sum_0; #summation
318 /* Starting from LAST_STMT, follow the defs of its uses in search
319 of the above pattern. */
321 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
322 return NULL;
324 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
326 /* Has been detected as widening-summation? */
328 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
329 type = gimple_expr_type (stmt);
330 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
331 return NULL;
332 oprnd0 = gimple_assign_rhs1 (stmt);
333 oprnd1 = gimple_assign_rhs2 (stmt);
334 half_type = TREE_TYPE (oprnd0);
336 else
338 gimple def_stmt;
340 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
341 return NULL;
342 oprnd0 = gimple_assign_rhs1 (last_stmt);
343 oprnd1 = gimple_assign_rhs2 (last_stmt);
344 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
345 || !types_compatible_p (TREE_TYPE (oprnd1), type))
346 return NULL;
347 stmt = last_stmt;
349 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
350 &promotion)
351 && promotion)
353 stmt = def_stmt;
354 oprnd0 = gimple_assign_rhs1 (stmt);
356 else
357 half_type = type;
360 /* So far so good. Since last_stmt was detected as a (summation) reduction,
361 we know that oprnd1 is the reduction variable (defined by a loop-header
362 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
363 Left to check that oprnd0 is defined by a (widen_)mult_expr */
364 if (TREE_CODE (oprnd0) != SSA_NAME)
365 return NULL;
367 prod_type = half_type;
368 stmt = SSA_NAME_DEF_STMT (oprnd0);
370 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
371 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
372 return NULL;
374 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
375 inside the loop (in case we are analyzing an outer-loop). */
376 if (!is_gimple_assign (stmt))
377 return NULL;
378 stmt_vinfo = vinfo_for_stmt (stmt);
379 gcc_assert (stmt_vinfo);
380 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
381 return NULL;
382 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
383 return NULL;
384 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
386 /* Has been detected as a widening multiplication? */
388 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
389 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
390 return NULL;
391 stmt_vinfo = vinfo_for_stmt (stmt);
392 gcc_assert (stmt_vinfo);
393 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
394 oprnd00 = gimple_assign_rhs1 (stmt);
395 oprnd01 = gimple_assign_rhs2 (stmt);
396 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
397 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
399 else
401 tree half_type0, half_type1;
402 gimple def_stmt;
403 tree oprnd0, oprnd1;
405 oprnd0 = gimple_assign_rhs1 (stmt);
406 oprnd1 = gimple_assign_rhs2 (stmt);
407 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
408 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
409 return NULL;
410 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
411 &promotion)
412 || !promotion)
413 return NULL;
414 oprnd00 = gimple_assign_rhs1 (def_stmt);
415 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
416 &promotion)
417 || !promotion)
418 return NULL;
419 oprnd01 = gimple_assign_rhs1 (def_stmt);
420 if (!types_compatible_p (half_type0, half_type1))
421 return NULL;
422 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
423 return NULL;
426 half_type = TREE_TYPE (oprnd00);
427 *type_in = half_type;
428 *type_out = type;
430 /* Pattern detected. Create a stmt to be used to replace the pattern: */
431 var = vect_recog_temp_ssa_var (type, NULL);
432 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
433 oprnd00, oprnd01, oprnd1);
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_NOTE, vect_location,
438 "vect_recog_dot_prod_pattern: detected: ");
439 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
440 dump_printf (MSG_NOTE, "\n");
443 /* We don't allow changing the order of the computation in the inner-loop
444 when doing outer-loop vectorization. */
445 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
447 return pattern_stmt;
451 /* Function vect_recog_sad_pattern
453 Try to find the following Sum of Absolute Difference (SAD) pattern:
455 type x_t, y_t;
456 signed TYPE1 diff, abs_diff;
457 TYPE2 sum = init;
458 loop:
459 sum_0 = phi <init, sum_1>
460 S1 x_t = ...
461 S2 y_t = ...
462 S3 x_T = (TYPE1) x_t;
463 S4 y_T = (TYPE1) y_t;
464 S5 diff = x_T - y_T;
465 S6 abs_diff = ABS_EXPR <diff>;
466 [S7 abs_diff = (TYPE2) abs_diff; #optional]
467 S8 sum_1 = abs_diff + sum_0;
469 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
470 same size of 'TYPE1' or bigger. This is a special case of a reduction
471 computation.
473 Input:
475 * STMTS: Contains a stmt from which the pattern search begins. In the
476 example, when this function is called with S8, the pattern
477 {S3,S4,S5,S6,S7,S8} will be detected.
479 Output:
481 * TYPE_IN: The type of the input arguments to the pattern.
483 * TYPE_OUT: The type of the output of this pattern.
485 * Return value: A new stmt that will be used to replace the sequence of
486 stmts that constitute the pattern. In this case it will be:
487 SAD_EXPR <x_t, y_t, sum_0>
490 static gimple
491 vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
492 tree *type_out)
494 gimple last_stmt = (*stmts)[0];
495 tree sad_oprnd0, sad_oprnd1;
496 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
497 tree half_type;
498 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
499 struct loop *loop;
500 bool promotion;
502 if (!loop_info)
503 return NULL;
505 loop = LOOP_VINFO_LOOP (loop_info);
507 if (!is_gimple_assign (last_stmt))
508 return NULL;
510 tree sum_type = gimple_expr_type (last_stmt);
512 /* Look for the following pattern
513 DX = (TYPE1) X;
514 DY = (TYPE1) Y;
515 DDIFF = DX - DY;
516 DAD = ABS_EXPR <DDIFF>;
517 DDPROD = (TYPE2) DPROD;
518 sum_1 = DAD + sum_0;
519 In which
520 - DX is at least double the size of X
521 - DY is at least double the size of Y
522 - DX, DY, DDIFF, DAD all have the same type
523 - sum is the same size of DAD or bigger
524 - sum has been recognized as a reduction variable.
526 This is equivalent to:
527 DDIFF = X w- Y; #widen sub
528 DAD = ABS_EXPR <DDIFF>;
529 sum_1 = DAD w+ sum_0; #widen summation
531 DDIFF = X w- Y; #widen sub
532 DAD = ABS_EXPR <DDIFF>;
533 sum_1 = DAD + sum_0; #summation
536 /* Starting from LAST_STMT, follow the defs of its uses in search
537 of the above pattern. */
539 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
540 return NULL;
542 tree plus_oprnd0, plus_oprnd1;
544 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
546 /* Has been detected as widening-summation? */
548 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
549 sum_type = gimple_expr_type (stmt);
550 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
551 return NULL;
552 plus_oprnd0 = gimple_assign_rhs1 (stmt);
553 plus_oprnd1 = gimple_assign_rhs2 (stmt);
554 half_type = TREE_TYPE (plus_oprnd0);
556 else
558 gimple def_stmt;
560 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
561 return NULL;
562 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
563 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
564 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
565 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
566 return NULL;
568 /* The type conversion could be promotion, demotion,
569 or just signed -> unsigned. */
570 if (type_conversion_p (plus_oprnd0, last_stmt, false,
571 &half_type, &def_stmt, &promotion))
572 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
573 else
574 half_type = sum_type;
577 /* So far so good. Since last_stmt was detected as a (summation) reduction,
578 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
579 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
580 Then check that plus_oprnd0 is defined by an abs_expr. */
582 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
583 return NULL;
585 tree abs_type = half_type;
586 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
588 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
589 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
590 return NULL;
592 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
593 inside the loop (in case we are analyzing an outer-loop). */
594 if (!is_gimple_assign (abs_stmt))
595 return NULL;
597 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
598 gcc_assert (abs_stmt_vinfo);
599 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
600 return NULL;
601 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
602 return NULL;
604 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
605 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
606 return NULL;
607 if (TYPE_UNSIGNED (abs_type))
608 return NULL;
610 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
612 if (TREE_CODE (abs_oprnd) != SSA_NAME)
613 return NULL;
615 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
617 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
618 if (!gimple_bb (diff_stmt)
619 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
620 return NULL;
622 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
623 inside the loop (in case we are analyzing an outer-loop). */
624 if (!is_gimple_assign (diff_stmt))
625 return NULL;
627 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
628 gcc_assert (diff_stmt_vinfo);
629 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
630 return NULL;
631 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
632 return NULL;
634 tree half_type0, half_type1;
635 gimple def_stmt;
637 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
638 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
640 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
641 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
642 return NULL;
643 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
644 &half_type0, &def_stmt, &promotion)
645 || !promotion)
646 return NULL;
647 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
649 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
650 &half_type1, &def_stmt, &promotion)
651 || !promotion)
652 return NULL;
653 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
655 if (!types_compatible_p (half_type0, half_type1))
656 return NULL;
657 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
658 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
659 return NULL;
661 *type_in = TREE_TYPE (sad_oprnd0);
662 *type_out = sum_type;
664 /* Pattern detected. Create a stmt to be used to replace the pattern: */
665 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
666 gimple pattern_stmt = gimple_build_assign_with_ops
667 (SAD_EXPR, var, sad_oprnd0, sad_oprnd1, plus_oprnd1);
669 if (dump_enabled_p ())
671 dump_printf_loc (MSG_NOTE, vect_location,
672 "vect_recog_sad_pattern: detected: ");
673 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
674 dump_printf (MSG_NOTE, "\n");
677 /* We don't allow changing the order of the computation in the inner-loop
678 when doing outer-loop vectorization. */
679 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
681 return pattern_stmt;
685 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
686 and LSHIFT_EXPR.
688 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
689 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
691 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
692 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
693 that satisfies the above restrictions, we can perform a widening opeartion
694 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
695 with a_it = (interm_type) a_t; */
697 static bool
698 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
699 tree const_oprnd, tree *oprnd,
700 vec<gimple> *stmts, tree type,
701 tree *half_type, gimple def_stmt)
703 tree new_type, new_oprnd;
704 gimple new_stmt;
706 if (code != MULT_EXPR && code != LSHIFT_EXPR)
707 return false;
709 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
710 || (code == LSHIFT_EXPR
711 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
712 != 1))
713 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
715 /* CONST_OPRND is a constant of HALF_TYPE. */
716 *oprnd = gimple_assign_rhs1 (def_stmt);
717 return true;
720 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
721 return false;
723 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
724 return false;
726 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
727 a type 2 times bigger than HALF_TYPE. */
728 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
729 TYPE_UNSIGNED (type));
730 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
731 || (code == LSHIFT_EXPR
732 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
733 return false;
735 /* Use NEW_TYPE for widening operation. */
736 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
738 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
739 /* Check if the already created pattern stmt is what we need. */
740 if (!is_gimple_assign (new_stmt)
741 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
742 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
743 return false;
745 stmts->safe_push (def_stmt);
746 *oprnd = gimple_assign_lhs (new_stmt);
748 else
750 /* Create a_T = (NEW_TYPE) a_t; */
751 *oprnd = gimple_assign_rhs1 (def_stmt);
752 new_oprnd = make_ssa_name (new_type, NULL);
753 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
754 NULL_TREE);
755 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
756 stmts->safe_push (def_stmt);
757 *oprnd = new_oprnd;
760 *half_type = new_type;
761 return true;
765 /* Function vect_recog_widen_mult_pattern
767 Try to find the following pattern:
769 type1 a_t;
770 type2 b_t;
771 TYPE a_T, b_T, prod_T;
773 S1 a_t = ;
774 S2 b_t = ;
775 S3 a_T = (TYPE) a_t;
776 S4 b_T = (TYPE) b_t;
777 S5 prod_T = a_T * b_T;
779 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
781 Also detect unsigned cases:
783 unsigned type1 a_t;
784 unsigned type2 b_t;
785 unsigned TYPE u_prod_T;
786 TYPE a_T, b_T, prod_T;
788 S1 a_t = ;
789 S2 b_t = ;
790 S3 a_T = (TYPE) a_t;
791 S4 b_T = (TYPE) b_t;
792 S5 prod_T = a_T * b_T;
793 S6 u_prod_T = (unsigned TYPE) prod_T;
795 and multiplication by constants:
797 type a_t;
798 TYPE a_T, prod_T;
800 S1 a_t = ;
801 S3 a_T = (TYPE) a_t;
802 S5 prod_T = a_T * CONST;
804 A special case of multiplication by constants is when 'TYPE' is 4 times
805 bigger than 'type', but CONST fits an intermediate type 2 times smaller
806 than 'TYPE'. In that case we create an additional pattern stmt for S3
807 to create a variable of the intermediate type, and perform widen-mult
808 on the intermediate type as well:
810 type a_t;
811 interm_type a_it;
812 TYPE a_T, prod_T, prod_T';
814 S1 a_t = ;
815 S3 a_T = (TYPE) a_t;
816 '--> a_it = (interm_type) a_t;
817 S5 prod_T = a_T * CONST;
818 '--> prod_T' = a_it w* CONST;
820 Input/Output:
822 * STMTS: Contains a stmt from which the pattern search begins. In the
823 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
824 is detected. In case of unsigned widen-mult, the original stmt (S5) is
825 replaced with S6 in STMTS. In case of multiplication by a constant
826 of an intermediate type (the last case above), STMTS also contains S3
827 (inserted before S5).
829 Output:
831 * TYPE_IN: The type of the input arguments to the pattern.
833 * TYPE_OUT: The type of the output of this pattern.
835 * Return value: A new stmt that will be used to replace the sequence of
836 stmts that constitute the pattern. In this case it will be:
837 WIDEN_MULT <a_t, b_t>
838 If the result of WIDEN_MULT needs to be converted to a larger type, the
839 returned stmt will be this type conversion stmt.
842 static gimple
843 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
844 tree *type_in, tree *type_out)
846 gimple last_stmt = stmts->pop ();
847 gimple def_stmt0, def_stmt1;
848 tree oprnd0, oprnd1;
849 tree type, half_type0, half_type1;
850 gimple new_stmt = NULL, pattern_stmt = NULL;
851 tree vectype, vecitype;
852 tree var;
853 enum tree_code dummy_code;
854 int dummy_int;
855 vec<tree> dummy_vec;
856 bool op1_ok;
857 bool promotion;
859 if (!is_gimple_assign (last_stmt))
860 return NULL;
862 type = gimple_expr_type (last_stmt);
864 /* Starting from LAST_STMT, follow the defs of its uses in search
865 of the above pattern. */
867 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
868 return NULL;
870 oprnd0 = gimple_assign_rhs1 (last_stmt);
871 oprnd1 = gimple_assign_rhs2 (last_stmt);
872 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
873 || !types_compatible_p (TREE_TYPE (oprnd1), type))
874 return NULL;
876 /* Check argument 0. */
877 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
878 &promotion)
879 || !promotion)
880 return NULL;
881 /* Check argument 1. */
882 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
883 &def_stmt1, &promotion);
885 if (op1_ok && promotion)
887 oprnd0 = gimple_assign_rhs1 (def_stmt0);
888 oprnd1 = gimple_assign_rhs1 (def_stmt1);
890 else
892 if (TREE_CODE (oprnd1) == INTEGER_CST
893 && TREE_CODE (half_type0) == INTEGER_TYPE
894 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
895 &oprnd0, stmts, type,
896 &half_type0, def_stmt0))
898 half_type1 = half_type0;
899 oprnd1 = fold_convert (half_type1, oprnd1);
901 else
902 return NULL;
905 /* If the two arguments have different sizes, convert the one with
906 the smaller type into the larger type. */
907 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
909 tree* oprnd = NULL;
910 gimple def_stmt = NULL;
912 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
914 def_stmt = def_stmt0;
915 half_type0 = half_type1;
916 oprnd = &oprnd0;
918 else
920 def_stmt = def_stmt1;
921 half_type1 = half_type0;
922 oprnd = &oprnd1;
925 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
926 tree new_oprnd = make_ssa_name (half_type0, NULL);
927 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
928 old_oprnd, NULL_TREE);
929 *oprnd = new_oprnd;
932 /* Handle unsigned case. Look for
933 S6 u_prod_T = (unsigned TYPE) prod_T;
934 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
935 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
937 gimple use_stmt;
938 tree use_lhs;
939 tree use_type;
941 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
942 return NULL;
944 use_stmt = vect_single_imm_use (last_stmt);
945 if (!use_stmt || !is_gimple_assign (use_stmt)
946 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
947 return NULL;
949 use_lhs = gimple_assign_lhs (use_stmt);
950 use_type = TREE_TYPE (use_lhs);
951 if (!INTEGRAL_TYPE_P (use_type)
952 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
953 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
954 return NULL;
956 type = use_type;
957 last_stmt = use_stmt;
960 if (!types_compatible_p (half_type0, half_type1))
961 return NULL;
963 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
964 to get an intermediate result of type ITYPE. In this case we need
965 to build a statement to convert this intermediate result to type TYPE. */
966 tree itype = type;
967 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
968 itype = build_nonstandard_integer_type
969 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
970 TYPE_UNSIGNED (type));
972 /* Pattern detected. */
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "vect_recog_widen_mult_pattern: detected:\n");
977 /* Check target support */
978 vectype = get_vectype_for_scalar_type (half_type0);
979 vecitype = get_vectype_for_scalar_type (itype);
980 if (!vectype
981 || !vecitype
982 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
983 vecitype, vectype,
984 &dummy_code, &dummy_code,
985 &dummy_int, &dummy_vec))
986 return NULL;
988 *type_in = vectype;
989 *type_out = get_vectype_for_scalar_type (type);
991 /* Pattern supported. Create a stmt to be used to replace the pattern: */
992 var = vect_recog_temp_ssa_var (itype, NULL);
993 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
994 oprnd1);
996 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
997 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
998 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
999 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1001 /* If the original two operands have different sizes, we may need to convert
1002 the smaller one into the larget type. If this is the case, at this point
1003 the new stmt is already built. */
1004 if (new_stmt)
1006 append_pattern_def_seq (stmt_vinfo, new_stmt);
1007 stmt_vec_info new_stmt_info
1008 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
1009 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1010 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1013 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1014 the result of the widen-mult operation into type TYPE. */
1015 if (itype != type)
1017 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1018 stmt_vec_info pattern_stmt_info
1019 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1020 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1021 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1022 pattern_stmt
1023 = gimple_build_assign_with_ops (NOP_EXPR,
1024 vect_recog_temp_ssa_var (type, NULL),
1025 gimple_assign_lhs (pattern_stmt),
1026 NULL_TREE);
1029 if (dump_enabled_p ())
1030 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1032 stmts->safe_push (last_stmt);
1033 return pattern_stmt;
1037 /* Function vect_recog_pow_pattern
1039 Try to find the following pattern:
1041 x = POW (y, N);
1043 with POW being one of pow, powf, powi, powif and N being
1044 either 2 or 0.5.
1046 Input:
1048 * LAST_STMT: A stmt from which the pattern search begins.
1050 Output:
1052 * TYPE_IN: The type of the input arguments to the pattern.
1054 * TYPE_OUT: The type of the output of this pattern.
1056 * Return value: A new stmt that will be used to replace the sequence of
1057 stmts that constitute the pattern. In this case it will be:
1058 x = x * x
1060 x = sqrt (x)
1063 static gimple
1064 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
1065 tree *type_out)
1067 gimple last_stmt = (*stmts)[0];
1068 tree fn, base, exp = NULL;
1069 gimple stmt;
1070 tree var;
1072 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1073 return NULL;
1075 fn = gimple_call_fndecl (last_stmt);
1076 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1077 return NULL;
1079 switch (DECL_FUNCTION_CODE (fn))
1081 case BUILT_IN_POWIF:
1082 case BUILT_IN_POWI:
1083 case BUILT_IN_POWF:
1084 case BUILT_IN_POW:
1085 base = gimple_call_arg (last_stmt, 0);
1086 exp = gimple_call_arg (last_stmt, 1);
1087 if (TREE_CODE (exp) != REAL_CST
1088 && TREE_CODE (exp) != INTEGER_CST)
1089 return NULL;
1090 break;
1092 default:
1093 return NULL;
1096 /* We now have a pow or powi builtin function call with a constant
1097 exponent. */
1099 *type_out = NULL_TREE;
1101 /* Catch squaring. */
1102 if ((tree_fits_shwi_p (exp)
1103 && tree_to_shwi (exp) == 2)
1104 || (TREE_CODE (exp) == REAL_CST
1105 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
1107 *type_in = TREE_TYPE (base);
1109 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1110 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
1111 return stmt;
1114 /* Catch square root. */
1115 if (TREE_CODE (exp) == REAL_CST
1116 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1118 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1119 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1120 if (*type_in)
1122 gimple stmt = gimple_build_call (newfn, 1, base);
1123 if (vectorizable_function (stmt, *type_in, *type_in)
1124 != NULL_TREE)
1126 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1127 gimple_call_set_lhs (stmt, var);
1128 return stmt;
1133 return NULL;
1137 /* Function vect_recog_widen_sum_pattern
1139 Try to find the following pattern:
1141 type x_t;
1142 TYPE x_T, sum = init;
1143 loop:
1144 sum_0 = phi <init, sum_1>
1145 S1 x_t = *p;
1146 S2 x_T = (TYPE) x_t;
1147 S3 sum_1 = x_T + sum_0;
1149 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1150 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1151 a special case of a reduction computation.
1153 Input:
1155 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1156 when this function is called with S3, the pattern {S2,S3} will be detected.
1158 Output:
1160 * TYPE_IN: The type of the input arguments to the pattern.
1162 * TYPE_OUT: The type of the output of this pattern.
1164 * Return value: A new stmt that will be used to replace the sequence of
1165 stmts that constitute the pattern. In this case it will be:
1166 WIDEN_SUM <x_t, sum_0>
1168 Note: The widening-sum idiom is a widening reduction pattern that is
1169 vectorized without preserving all the intermediate results. It
1170 produces only N/2 (widened) results (by summing up pairs of
1171 intermediate results) rather than all N results. Therefore, we
1172 cannot allow this pattern when we want to get all the results and in
1173 the correct order (as is the case when this computation is in an
1174 inner-loop nested in an outer-loop that us being vectorized). */
1176 static gimple
1177 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
1178 tree *type_out)
1180 gimple stmt, last_stmt = (*stmts)[0];
1181 tree oprnd0, oprnd1;
1182 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1183 tree type, half_type;
1184 gimple pattern_stmt;
1185 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1186 struct loop *loop;
1187 tree var;
1188 bool promotion;
1190 if (!loop_info)
1191 return NULL;
1193 loop = LOOP_VINFO_LOOP (loop_info);
1195 if (!is_gimple_assign (last_stmt))
1196 return NULL;
1198 type = gimple_expr_type (last_stmt);
1200 /* Look for the following pattern
1201 DX = (TYPE) X;
1202 sum_1 = DX + sum_0;
1203 In which DX is at least double the size of X, and sum_1 has been
1204 recognized as a reduction variable.
1207 /* Starting from LAST_STMT, follow the defs of its uses in search
1208 of the above pattern. */
1210 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1211 return NULL;
1213 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
1214 return NULL;
1216 oprnd0 = gimple_assign_rhs1 (last_stmt);
1217 oprnd1 = gimple_assign_rhs2 (last_stmt);
1218 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1219 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1220 return NULL;
1222 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1223 we know that oprnd1 is the reduction variable (defined by a loop-header
1224 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1225 Left to check that oprnd0 is defined by a cast from type 'type' to type
1226 'TYPE'. */
1228 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1229 &promotion)
1230 || !promotion)
1231 return NULL;
1233 oprnd0 = gimple_assign_rhs1 (stmt);
1234 *type_in = half_type;
1235 *type_out = type;
1237 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1238 var = vect_recog_temp_ssa_var (type, NULL);
1239 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
1240 oprnd0, oprnd1);
1242 if (dump_enabled_p ())
1244 dump_printf_loc (MSG_NOTE, vect_location,
1245 "vect_recog_widen_sum_pattern: detected: ");
1246 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1247 dump_printf (MSG_NOTE, "\n");
1250 /* We don't allow changing the order of the computation in the inner-loop
1251 when doing outer-loop vectorization. */
1252 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
1254 return pattern_stmt;
1258 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1260 Input:
1261 STMT - a statement to check.
1262 DEF - we support operations with two operands, one of which is constant.
1263 The other operand can be defined by a demotion operation, or by a
1264 previous statement in a sequence of over-promoted operations. In the
1265 later case DEF is used to replace that operand. (It is defined by a
1266 pattern statement we created for the previous statement in the
1267 sequence).
1269 Input/output:
1270 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1271 NULL, it's the type of DEF.
1272 STMTS - additional pattern statements. If a pattern statement (type
1273 conversion) is created in this function, its original statement is
1274 added to STMTS.
1276 Output:
1277 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1278 operands to use in the new pattern statement for STMT (will be created
1279 in vect_recog_over_widening_pattern ()).
1280 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1281 statements for STMT: the first one is a type promotion and the second
1282 one is the operation itself. We return the type promotion statement
1283 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1284 the second pattern statement. */
1286 static bool
1287 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1288 tree *op0, tree *op1, gimple *new_def_stmt,
1289 vec<gimple> *stmts)
1291 enum tree_code code;
1292 tree const_oprnd, oprnd;
1293 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1294 gimple def_stmt, new_stmt;
1295 bool first = false;
1296 bool promotion;
1298 *op0 = NULL_TREE;
1299 *op1 = NULL_TREE;
1300 *new_def_stmt = NULL;
1302 if (!is_gimple_assign (stmt))
1303 return false;
1305 code = gimple_assign_rhs_code (stmt);
1306 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1307 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1308 return false;
1310 oprnd = gimple_assign_rhs1 (stmt);
1311 const_oprnd = gimple_assign_rhs2 (stmt);
1312 type = gimple_expr_type (stmt);
1314 if (TREE_CODE (oprnd) != SSA_NAME
1315 || TREE_CODE (const_oprnd) != INTEGER_CST)
1316 return false;
1318 /* If oprnd has other uses besides that in stmt we cannot mark it
1319 as being part of a pattern only. */
1320 if (!has_single_use (oprnd))
1321 return false;
1323 /* If we are in the middle of a sequence, we use DEF from a previous
1324 statement. Otherwise, OPRND has to be a result of type promotion. */
1325 if (*new_type)
1327 half_type = *new_type;
1328 oprnd = def;
1330 else
1332 first = true;
1333 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1334 &promotion)
1335 || !promotion
1336 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1337 return false;
1340 /* Can we perform the operation on a smaller type? */
1341 switch (code)
1343 case BIT_IOR_EXPR:
1344 case BIT_XOR_EXPR:
1345 case BIT_AND_EXPR:
1346 if (!int_fits_type_p (const_oprnd, half_type))
1348 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1349 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1350 return false;
1352 interm_type = build_nonstandard_integer_type (
1353 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1354 if (!int_fits_type_p (const_oprnd, interm_type))
1355 return false;
1358 break;
1360 case LSHIFT_EXPR:
1361 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1362 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1363 return false;
1365 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1366 (e.g., if the original value was char, the shift amount is at most 8
1367 if we want to use short). */
1368 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1369 return false;
1371 interm_type = build_nonstandard_integer_type (
1372 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1374 if (!vect_supportable_shift (code, interm_type))
1375 return false;
1377 break;
1379 case RSHIFT_EXPR:
1380 if (vect_supportable_shift (code, half_type))
1381 break;
1383 /* Try intermediate type - HALF_TYPE is not supported. */
1384 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1385 return false;
1387 interm_type = build_nonstandard_integer_type (
1388 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1390 if (!vect_supportable_shift (code, interm_type))
1391 return false;
1393 break;
1395 default:
1396 gcc_unreachable ();
1399 /* There are four possible cases:
1400 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1401 the first statement in the sequence)
1402 a. The original, HALF_TYPE, is not enough - we replace the promotion
1403 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1404 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1405 promotion.
1406 2. OPRND is defined by a pattern statement we created.
1407 a. Its type is not sufficient for the operation, we create a new stmt:
1408 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1409 this statement in NEW_DEF_STMT, and it is later put in
1410 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1411 b. OPRND is good to use in the new statement. */
1412 if (first)
1414 if (interm_type)
1416 /* Replace the original type conversion HALF_TYPE->TYPE with
1417 HALF_TYPE->INTERM_TYPE. */
1418 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1420 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1421 /* Check if the already created pattern stmt is what we need. */
1422 if (!is_gimple_assign (new_stmt)
1423 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1424 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1425 return false;
1427 stmts->safe_push (def_stmt);
1428 oprnd = gimple_assign_lhs (new_stmt);
1430 else
1432 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1433 oprnd = gimple_assign_rhs1 (def_stmt);
1434 new_oprnd = make_ssa_name (interm_type, NULL);
1435 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1436 oprnd, NULL_TREE);
1437 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1438 stmts->safe_push (def_stmt);
1439 oprnd = new_oprnd;
1442 else
1444 /* Retrieve the operand before the type promotion. */
1445 oprnd = gimple_assign_rhs1 (def_stmt);
1448 else
1450 if (interm_type)
1452 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1453 new_oprnd = make_ssa_name (interm_type, NULL);
1454 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1455 oprnd, NULL_TREE);
1456 oprnd = new_oprnd;
1457 *new_def_stmt = new_stmt;
1460 /* Otherwise, OPRND is already set. */
1463 if (interm_type)
1464 *new_type = interm_type;
1465 else
1466 *new_type = half_type;
1468 *op0 = oprnd;
1469 *op1 = fold_convert (*new_type, const_oprnd);
1471 return true;
1475 /* Try to find a statement or a sequence of statements that can be performed
1476 on a smaller type:
1478 type x_t;
1479 TYPE x_T, res0_T, res1_T;
1480 loop:
1481 S1 x_t = *p;
1482 S2 x_T = (TYPE) x_t;
1483 S3 res0_T = op (x_T, C0);
1484 S4 res1_T = op (res0_T, C1);
1485 S5 ... = () res1_T; - type demotion
1487 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1488 constants.
1489 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1490 be 'type' or some intermediate type. For now, we expect S5 to be a type
1491 demotion operation. We also check that S3 and S4 have only one use. */
1493 static gimple
1494 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1495 tree *type_in, tree *type_out)
1497 gimple stmt = stmts->pop ();
1498 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1499 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1500 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1501 bool first;
1502 tree type = NULL;
1504 first = true;
1505 while (1)
1507 if (!vinfo_for_stmt (stmt)
1508 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1509 return NULL;
1511 new_def_stmt = NULL;
1512 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1513 &op0, &op1, &new_def_stmt,
1514 stmts))
1516 if (first)
1517 return NULL;
1518 else
1519 break;
1522 /* STMT can be performed on a smaller type. Check its uses. */
1523 use_stmt = vect_single_imm_use (stmt);
1524 if (!use_stmt || !is_gimple_assign (use_stmt))
1525 return NULL;
1527 /* Create pattern statement for STMT. */
1528 vectype = get_vectype_for_scalar_type (new_type);
1529 if (!vectype)
1530 return NULL;
1532 /* We want to collect all the statements for which we create pattern
1533 statetments, except for the case when the last statement in the
1534 sequence doesn't have a corresponding pattern statement. In such
1535 case we associate the last pattern statement with the last statement
1536 in the sequence. Therefore, we only add the original statement to
1537 the list if we know that it is not the last. */
1538 if (prev_stmt)
1539 stmts->safe_push (prev_stmt);
1541 var = vect_recog_temp_ssa_var (new_type, NULL);
1542 pattern_stmt
1543 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1544 op0, op1);
1545 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1546 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1548 if (dump_enabled_p ())
1550 dump_printf_loc (MSG_NOTE, vect_location,
1551 "created pattern stmt: ");
1552 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1553 dump_printf (MSG_NOTE, "\n");
1556 type = gimple_expr_type (stmt);
1557 prev_stmt = stmt;
1558 stmt = use_stmt;
1560 first = false;
1563 /* We got a sequence. We expect it to end with a type demotion operation.
1564 Otherwise, we quit (for now). There are three possible cases: the
1565 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1566 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1567 NEW_TYPE differs (we create a new conversion statement). */
1568 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1570 use_lhs = gimple_assign_lhs (use_stmt);
1571 use_type = TREE_TYPE (use_lhs);
1572 /* Support only type demotion or signedess change. */
1573 if (!INTEGRAL_TYPE_P (use_type)
1574 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1575 return NULL;
1577 /* Check that NEW_TYPE is not bigger than the conversion result. */
1578 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1579 return NULL;
1581 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1582 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1584 /* Create NEW_TYPE->USE_TYPE conversion. */
1585 new_oprnd = make_ssa_name (use_type, NULL);
1586 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1587 var, NULL_TREE);
1588 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1590 *type_in = get_vectype_for_scalar_type (new_type);
1591 *type_out = get_vectype_for_scalar_type (use_type);
1593 /* We created a pattern statement for the last statement in the
1594 sequence, so we don't need to associate it with the pattern
1595 statement created for PREV_STMT. Therefore, we add PREV_STMT
1596 to the list in order to mark it later in vect_pattern_recog_1. */
1597 if (prev_stmt)
1598 stmts->safe_push (prev_stmt);
1600 else
1602 if (prev_stmt)
1603 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1604 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1606 *type_in = vectype;
1607 *type_out = NULL_TREE;
1610 stmts->safe_push (use_stmt);
1612 else
1613 /* TODO: support general case, create a conversion to the correct type. */
1614 return NULL;
1616 /* Pattern detected. */
1617 if (dump_enabled_p ())
1619 dump_printf_loc (MSG_NOTE, vect_location,
1620 "vect_recog_over_widening_pattern: detected: ");
1621 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1622 dump_printf (MSG_NOTE, "\n");
1625 return pattern_stmt;
1628 /* Detect widening shift pattern:
1630 type a_t;
1631 TYPE a_T, res_T;
1633 S1 a_t = ;
1634 S2 a_T = (TYPE) a_t;
1635 S3 res_T = a_T << CONST;
1637 where type 'TYPE' is at least double the size of type 'type'.
1639 Also detect cases where the shift result is immediately converted
1640 to another type 'result_type' that is no larger in size than 'TYPE'.
1641 In those cases we perform a widen-shift that directly results in
1642 'result_type', to avoid a possible over-widening situation:
1644 type a_t;
1645 TYPE a_T, res_T;
1646 result_type res_result;
1648 S1 a_t = ;
1649 S2 a_T = (TYPE) a_t;
1650 S3 res_T = a_T << CONST;
1651 S4 res_result = (result_type) res_T;
1652 '--> res_result' = a_t w<< CONST;
1654 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1655 create an additional pattern stmt for S2 to create a variable of an
1656 intermediate type, and perform widen-shift on the intermediate type:
1658 type a_t;
1659 interm_type a_it;
1660 TYPE a_T, res_T, res_T';
1662 S1 a_t = ;
1663 S2 a_T = (TYPE) a_t;
1664 '--> a_it = (interm_type) a_t;
1665 S3 res_T = a_T << CONST;
1666 '--> res_T' = a_it <<* CONST;
1668 Input/Output:
1670 * STMTS: Contains a stmt from which the pattern search begins.
1671 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1672 in STMTS. When an intermediate type is used and a pattern statement is
1673 created for S2, we also put S2 here (before S3).
1675 Output:
1677 * TYPE_IN: The type of the input arguments to the pattern.
1679 * TYPE_OUT: The type of the output of this pattern.
1681 * Return value: A new stmt that will be used to replace the sequence of
1682 stmts that constitute the pattern. In this case it will be:
1683 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1685 static gimple
1686 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1687 tree *type_in, tree *type_out)
1689 gimple last_stmt = stmts->pop ();
1690 gimple def_stmt0;
1691 tree oprnd0, oprnd1;
1692 tree type, half_type0;
1693 gimple pattern_stmt;
1694 tree vectype, vectype_out = NULL_TREE;
1695 tree var;
1696 enum tree_code dummy_code;
1697 int dummy_int;
1698 vec<tree> dummy_vec;
1699 gimple use_stmt;
1700 bool promotion;
1702 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1703 return NULL;
1705 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1706 return NULL;
1708 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1709 return NULL;
1711 oprnd0 = gimple_assign_rhs1 (last_stmt);
1712 oprnd1 = gimple_assign_rhs2 (last_stmt);
1713 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1714 return NULL;
1716 /* Check operand 0: it has to be defined by a type promotion. */
1717 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1718 &promotion)
1719 || !promotion)
1720 return NULL;
1722 /* Check operand 1: has to be positive. We check that it fits the type
1723 in vect_handle_widen_op_by_const (). */
1724 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1725 return NULL;
1727 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1728 type = gimple_expr_type (last_stmt);
1730 /* Check for subsequent conversion to another type. */
1731 use_stmt = vect_single_imm_use (last_stmt);
1732 if (use_stmt && is_gimple_assign (use_stmt)
1733 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1734 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1736 tree use_lhs = gimple_assign_lhs (use_stmt);
1737 tree use_type = TREE_TYPE (use_lhs);
1739 if (INTEGRAL_TYPE_P (use_type)
1740 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1742 last_stmt = use_stmt;
1743 type = use_type;
1747 /* Check if this a widening operation. */
1748 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1749 &oprnd0, stmts,
1750 type, &half_type0, def_stmt0))
1751 return NULL;
1753 /* Pattern detected. */
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE, vect_location,
1756 "vect_recog_widen_shift_pattern: detected:\n");
1758 /* Check target support. */
1759 vectype = get_vectype_for_scalar_type (half_type0);
1760 vectype_out = get_vectype_for_scalar_type (type);
1762 if (!vectype
1763 || !vectype_out
1764 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1765 vectype_out, vectype,
1766 &dummy_code, &dummy_code,
1767 &dummy_int, &dummy_vec))
1768 return NULL;
1770 *type_in = vectype;
1771 *type_out = vectype_out;
1773 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1774 var = vect_recog_temp_ssa_var (type, NULL);
1775 pattern_stmt =
1776 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1778 if (dump_enabled_p ())
1779 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1781 stmts->safe_push (last_stmt);
1782 return pattern_stmt;
1785 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1787 type a_t, b_t, c_t;
1789 S0 a_t = b_t r<< c_t;
1791 Input/Output:
1793 * STMTS: Contains a stmt from which the pattern search begins,
1794 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1795 with a sequence:
1797 S1 d_t = -c_t;
1798 S2 e_t = d_t & (B - 1);
1799 S3 f_t = b_t << c_t;
1800 S4 g_t = b_t >> e_t;
1801 S0 a_t = f_t | g_t;
1803 where B is element bitsize of type.
1805 Output:
1807 * TYPE_IN: The type of the input arguments to the pattern.
1809 * TYPE_OUT: The type of the output of this pattern.
1811 * Return value: A new stmt that will be used to replace the rotate
1812 S0 stmt. */
1814 static gimple
1815 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1817 gimple last_stmt = stmts->pop ();
1818 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1819 gimple pattern_stmt, def_stmt;
1820 enum tree_code rhs_code;
1821 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1822 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1823 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1824 enum vect_def_type dt;
1825 optab optab1, optab2;
1826 edge ext_def = NULL;
1828 if (!is_gimple_assign (last_stmt))
1829 return NULL;
1831 rhs_code = gimple_assign_rhs_code (last_stmt);
1832 switch (rhs_code)
1834 case LROTATE_EXPR:
1835 case RROTATE_EXPR:
1836 break;
1837 default:
1838 return NULL;
1841 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1842 return NULL;
1844 lhs = gimple_assign_lhs (last_stmt);
1845 oprnd0 = gimple_assign_rhs1 (last_stmt);
1846 type = TREE_TYPE (oprnd0);
1847 oprnd1 = gimple_assign_rhs2 (last_stmt);
1848 if (TREE_CODE (oprnd0) != SSA_NAME
1849 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1850 || !INTEGRAL_TYPE_P (type)
1851 || !TYPE_UNSIGNED (type))
1852 return NULL;
1854 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1855 &def, &dt))
1856 return NULL;
1858 if (dt != vect_internal_def
1859 && dt != vect_constant_def
1860 && dt != vect_external_def)
1861 return NULL;
1863 vectype = get_vectype_for_scalar_type (type);
1864 if (vectype == NULL_TREE)
1865 return NULL;
1867 /* If vector/vector or vector/scalar rotate is supported by the target,
1868 don't do anything here. */
1869 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1870 if (optab1
1871 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1872 return NULL;
1874 if (bb_vinfo != NULL || dt != vect_internal_def)
1876 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1877 if (optab2
1878 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1879 return NULL;
1882 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1883 don't do anything here either. */
1884 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1885 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1886 if (!optab1
1887 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1888 || !optab2
1889 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1891 if (bb_vinfo == NULL && dt == vect_internal_def)
1892 return NULL;
1893 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1894 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1895 if (!optab1
1896 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1897 || !optab2
1898 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1899 return NULL;
1902 *type_in = vectype;
1903 *type_out = vectype;
1904 if (*type_in == NULL_TREE)
1905 return NULL;
1907 if (dt == vect_external_def
1908 && TREE_CODE (oprnd1) == SSA_NAME
1909 && loop_vinfo)
1911 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1912 ext_def = loop_preheader_edge (loop);
1913 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1915 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1916 if (bb == NULL
1917 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1918 ext_def = NULL;
1922 def = NULL_TREE;
1923 if (TREE_CODE (oprnd1) == INTEGER_CST
1924 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1925 def = oprnd1;
1926 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1928 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1929 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1930 && TYPE_PRECISION (TREE_TYPE (rhs1))
1931 == TYPE_PRECISION (type))
1932 def = rhs1;
1935 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1936 if (def == NULL_TREE)
1938 def = vect_recog_temp_ssa_var (type, NULL);
1939 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1940 NULL_TREE);
1941 if (ext_def)
1943 basic_block new_bb
1944 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1945 gcc_assert (!new_bb);
1947 else
1948 append_pattern_def_seq (stmt_vinfo, def_stmt);
1950 stype = TREE_TYPE (def);
1952 if (TREE_CODE (def) == INTEGER_CST)
1954 if (!tree_fits_uhwi_p (def)
1955 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1956 || integer_zerop (def))
1957 return NULL;
1958 def2 = build_int_cst (stype,
1959 GET_MODE_PRECISION (TYPE_MODE (type))
1960 - tree_to_uhwi (def));
1962 else
1964 tree vecstype = get_vectype_for_scalar_type (stype);
1965 stmt_vec_info def_stmt_vinfo;
1967 if (vecstype == NULL_TREE)
1968 return NULL;
1969 def2 = vect_recog_temp_ssa_var (stype, NULL);
1970 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1971 NULL_TREE);
1972 if (ext_def)
1974 basic_block new_bb
1975 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1976 gcc_assert (!new_bb);
1978 else
1980 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1981 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1982 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1983 append_pattern_def_seq (stmt_vinfo, def_stmt);
1986 def2 = vect_recog_temp_ssa_var (stype, NULL);
1987 tree mask
1988 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1989 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1990 gimple_assign_lhs (def_stmt),
1991 mask);
1992 if (ext_def)
1994 basic_block new_bb
1995 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1996 gcc_assert (!new_bb);
1998 else
2000 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2001 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2002 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2003 append_pattern_def_seq (stmt_vinfo, def_stmt);
2007 var1 = vect_recog_temp_ssa_var (type, NULL);
2008 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
2009 ? LSHIFT_EXPR : RSHIFT_EXPR,
2010 var1, oprnd0, def);
2011 append_pattern_def_seq (stmt_vinfo, def_stmt);
2013 var2 = vect_recog_temp_ssa_var (type, NULL);
2014 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
2015 ? RSHIFT_EXPR : LSHIFT_EXPR,
2016 var2, oprnd0, def2);
2017 append_pattern_def_seq (stmt_vinfo, def_stmt);
2019 /* Pattern detected. */
2020 if (dump_enabled_p ())
2021 dump_printf_loc (MSG_NOTE, vect_location,
2022 "vect_recog_rotate_pattern: detected:\n");
2024 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2025 var = vect_recog_temp_ssa_var (type, NULL);
2026 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
2028 if (dump_enabled_p ())
2029 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2031 stmts->safe_push (last_stmt);
2032 return pattern_stmt;
2035 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2036 vectorized:
2038 type a_t;
2039 TYPE b_T, res_T;
2041 S1 a_t = ;
2042 S2 b_T = ;
2043 S3 res_T = b_T op a_t;
2045 where type 'TYPE' is a type with different size than 'type',
2046 and op is <<, >> or rotate.
2048 Also detect cases:
2050 type a_t;
2051 TYPE b_T, c_T, res_T;
2053 S0 c_T = ;
2054 S1 a_t = (type) c_T;
2055 S2 b_T = ;
2056 S3 res_T = b_T op a_t;
2058 Input/Output:
2060 * STMTS: Contains a stmt from which the pattern search begins,
2061 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2062 with a shift/rotate which has same type on both operands, in the
2063 second case just b_T op c_T, in the first case with added cast
2064 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2066 Output:
2068 * TYPE_IN: The type of the input arguments to the pattern.
2070 * TYPE_OUT: The type of the output of this pattern.
2072 * Return value: A new stmt that will be used to replace the shift/rotate
2073 S3 stmt. */
2075 static gimple
2076 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
2077 tree *type_in, tree *type_out)
2079 gimple last_stmt = stmts->pop ();
2080 tree oprnd0, oprnd1, lhs, var;
2081 gimple pattern_stmt, def_stmt;
2082 enum tree_code rhs_code;
2083 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2084 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2085 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2086 enum vect_def_type dt;
2087 tree def;
2089 if (!is_gimple_assign (last_stmt))
2090 return NULL;
2092 rhs_code = gimple_assign_rhs_code (last_stmt);
2093 switch (rhs_code)
2095 case LSHIFT_EXPR:
2096 case RSHIFT_EXPR:
2097 case LROTATE_EXPR:
2098 case RROTATE_EXPR:
2099 break;
2100 default:
2101 return NULL;
2104 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2105 return NULL;
2107 lhs = gimple_assign_lhs (last_stmt);
2108 oprnd0 = gimple_assign_rhs1 (last_stmt);
2109 oprnd1 = gimple_assign_rhs2 (last_stmt);
2110 if (TREE_CODE (oprnd0) != SSA_NAME
2111 || TREE_CODE (oprnd1) != SSA_NAME
2112 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2113 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2114 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2115 || TYPE_PRECISION (TREE_TYPE (lhs))
2116 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2117 return NULL;
2119 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
2120 &def, &dt))
2121 return NULL;
2123 if (dt != vect_internal_def)
2124 return NULL;
2126 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2127 *type_out = *type_in;
2128 if (*type_in == NULL_TREE)
2129 return NULL;
2131 def = NULL_TREE;
2132 if (gimple_assign_cast_p (def_stmt))
2134 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2135 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2136 && TYPE_PRECISION (TREE_TYPE (rhs1))
2137 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2138 def = rhs1;
2141 if (def == NULL_TREE)
2143 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2144 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
2145 NULL_TREE);
2146 new_pattern_def_seq (stmt_vinfo, def_stmt);
2149 /* Pattern detected. */
2150 if (dump_enabled_p ())
2151 dump_printf_loc (MSG_NOTE, vect_location,
2152 "vect_recog_vector_vector_shift_pattern: detected:\n");
2154 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2155 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2156 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
2158 if (dump_enabled_p ())
2159 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2161 stmts->safe_push (last_stmt);
2162 return pattern_stmt;
2165 /* Detect a signed division by a constant that wouldn't be
2166 otherwise vectorized:
2168 type a_t, b_t;
2170 S1 a_t = b_t / N;
2172 where type 'type' is an integral type and N is a constant.
2174 Similarly handle modulo by a constant:
2176 S4 a_t = b_t % N;
2178 Input/Output:
2180 * STMTS: Contains a stmt from which the pattern search begins,
2181 i.e. the division stmt. S1 is replaced by if N is a power
2182 of two constant and type is signed:
2183 S3 y_t = b_t < 0 ? N - 1 : 0;
2184 S2 x_t = b_t + y_t;
2185 S1' a_t = x_t >> log2 (N);
2187 S4 is replaced if N is a power of two constant and
2188 type is signed by (where *_T temporaries have unsigned type):
2189 S9 y_T = b_t < 0 ? -1U : 0U;
2190 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2191 S7 z_t = (type) z_T;
2192 S6 w_t = b_t + z_t;
2193 S5 x_t = w_t & (N - 1);
2194 S4' a_t = x_t - z_t;
2196 Output:
2198 * TYPE_IN: The type of the input arguments to the pattern.
2200 * TYPE_OUT: The type of the output of this pattern.
2202 * Return value: A new stmt that will be used to replace the division
2203 S1 or modulo S4 stmt. */
2205 static gimple
2206 vect_recog_divmod_pattern (vec<gimple> *stmts,
2207 tree *type_in, tree *type_out)
2209 gimple last_stmt = stmts->pop ();
2210 tree oprnd0, oprnd1, vectype, itype, cond;
2211 gimple pattern_stmt, def_stmt;
2212 enum tree_code rhs_code;
2213 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2214 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2215 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2216 optab optab;
2217 tree q;
2218 int dummy_int, prec;
2219 stmt_vec_info def_stmt_vinfo;
2221 if (!is_gimple_assign (last_stmt))
2222 return NULL;
2224 rhs_code = gimple_assign_rhs_code (last_stmt);
2225 switch (rhs_code)
2227 case TRUNC_DIV_EXPR:
2228 case TRUNC_MOD_EXPR:
2229 break;
2230 default:
2231 return NULL;
2234 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2235 return NULL;
2237 oprnd0 = gimple_assign_rhs1 (last_stmt);
2238 oprnd1 = gimple_assign_rhs2 (last_stmt);
2239 itype = TREE_TYPE (oprnd0);
2240 if (TREE_CODE (oprnd0) != SSA_NAME
2241 || TREE_CODE (oprnd1) != INTEGER_CST
2242 || TREE_CODE (itype) != INTEGER_TYPE
2243 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2244 return NULL;
2246 vectype = get_vectype_for_scalar_type (itype);
2247 if (vectype == NULL_TREE)
2248 return NULL;
2250 /* If the target can handle vectorized division or modulo natively,
2251 don't attempt to optimize this. */
2252 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2253 if (optab != unknown_optab)
2255 enum machine_mode vec_mode = TYPE_MODE (vectype);
2256 int icode = (int) optab_handler (optab, vec_mode);
2257 if (icode != CODE_FOR_nothing)
2258 return NULL;
2261 prec = TYPE_PRECISION (itype);
2262 if (integer_pow2p (oprnd1))
2264 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2265 return NULL;
2267 /* Pattern detected. */
2268 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_NOTE, vect_location,
2270 "vect_recog_divmod_pattern: detected:\n");
2272 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2273 build_int_cst (itype, 0));
2274 if (rhs_code == TRUNC_DIV_EXPR)
2276 tree var = vect_recog_temp_ssa_var (itype, NULL);
2277 tree shift;
2278 def_stmt
2279 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2280 fold_build2 (MINUS_EXPR, itype,
2281 oprnd1,
2282 build_int_cst (itype,
2283 1)),
2284 build_int_cst (itype, 0));
2285 new_pattern_def_seq (stmt_vinfo, def_stmt);
2286 var = vect_recog_temp_ssa_var (itype, NULL);
2287 def_stmt
2288 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
2289 gimple_assign_lhs (def_stmt));
2290 append_pattern_def_seq (stmt_vinfo, def_stmt);
2292 shift = build_int_cst (itype, tree_log2 (oprnd1));
2293 pattern_stmt
2294 = gimple_build_assign_with_ops (RSHIFT_EXPR,
2295 vect_recog_temp_ssa_var (itype,
2296 NULL),
2297 var, shift);
2299 else
2301 tree signmask;
2302 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2303 if (compare_tree_int (oprnd1, 2) == 0)
2305 signmask = vect_recog_temp_ssa_var (itype, NULL);
2306 def_stmt
2307 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
2308 build_int_cst (itype, 1),
2309 build_int_cst (itype, 0));
2310 append_pattern_def_seq (stmt_vinfo, def_stmt);
2312 else
2314 tree utype
2315 = build_nonstandard_integer_type (prec, 1);
2316 tree vecutype = get_vectype_for_scalar_type (utype);
2317 tree shift
2318 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2319 - tree_log2 (oprnd1));
2320 tree var = vect_recog_temp_ssa_var (utype, NULL);
2322 def_stmt
2323 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2324 build_int_cst (utype, -1),
2325 build_int_cst (utype, 0));
2326 def_stmt_vinfo
2327 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2328 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2329 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2330 append_pattern_def_seq (stmt_vinfo, def_stmt);
2331 var = vect_recog_temp_ssa_var (utype, NULL);
2332 def_stmt
2333 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2334 gimple_assign_lhs (def_stmt),
2335 shift);
2336 def_stmt_vinfo
2337 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2338 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2339 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2340 append_pattern_def_seq (stmt_vinfo, def_stmt);
2341 signmask = vect_recog_temp_ssa_var (itype, NULL);
2342 def_stmt
2343 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2344 NULL_TREE);
2345 append_pattern_def_seq (stmt_vinfo, def_stmt);
2347 def_stmt
2348 = gimple_build_assign_with_ops (PLUS_EXPR,
2349 vect_recog_temp_ssa_var (itype,
2350 NULL),
2351 oprnd0, signmask);
2352 append_pattern_def_seq (stmt_vinfo, def_stmt);
2353 def_stmt
2354 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2355 vect_recog_temp_ssa_var (itype,
2356 NULL),
2357 gimple_assign_lhs (def_stmt),
2358 fold_build2 (MINUS_EXPR, itype,
2359 oprnd1,
2360 build_int_cst (itype,
2361 1)));
2362 append_pattern_def_seq (stmt_vinfo, def_stmt);
2364 pattern_stmt
2365 = gimple_build_assign_with_ops (MINUS_EXPR,
2366 vect_recog_temp_ssa_var (itype,
2367 NULL),
2368 gimple_assign_lhs (def_stmt),
2369 signmask);
2372 if (dump_enabled_p ())
2373 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2376 stmts->safe_push (last_stmt);
2378 *type_in = vectype;
2379 *type_out = vectype;
2380 return pattern_stmt;
2383 if (prec > HOST_BITS_PER_WIDE_INT
2384 || integer_zerop (oprnd1))
2385 return NULL;
2387 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2388 return NULL;
2390 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2392 if (TYPE_UNSIGNED (itype))
2394 unsigned HOST_WIDE_INT mh, ml;
2395 int pre_shift, post_shift;
2396 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2397 & GET_MODE_MASK (TYPE_MODE (itype)));
2398 tree t1, t2, t3, t4;
2400 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2401 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2402 return NULL;
2404 /* Find a suitable multiplier and right shift count
2405 instead of multiplying with D. */
2406 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2408 /* If the suggested multiplier is more than SIZE bits, we can do better
2409 for even divisors, using an initial right shift. */
2410 if (mh != 0 && (d & 1) == 0)
2412 pre_shift = floor_log2 (d & -d);
2413 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2414 &ml, &post_shift, &dummy_int);
2415 gcc_assert (!mh);
2417 else
2418 pre_shift = 0;
2420 if (mh != 0)
2422 if (post_shift - 1 >= prec)
2423 return NULL;
2425 /* t1 = oprnd0 h* ml;
2426 t2 = oprnd0 - t1;
2427 t3 = t2 >> 1;
2428 t4 = t1 + t3;
2429 q = t4 >> (post_shift - 1); */
2430 t1 = vect_recog_temp_ssa_var (itype, NULL);
2431 def_stmt
2432 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2433 build_int_cst (itype, ml));
2434 append_pattern_def_seq (stmt_vinfo, def_stmt);
2436 t2 = vect_recog_temp_ssa_var (itype, NULL);
2437 def_stmt
2438 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2439 append_pattern_def_seq (stmt_vinfo, def_stmt);
2441 t3 = vect_recog_temp_ssa_var (itype, NULL);
2442 def_stmt
2443 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2444 integer_one_node);
2445 append_pattern_def_seq (stmt_vinfo, def_stmt);
2447 t4 = vect_recog_temp_ssa_var (itype, NULL);
2448 def_stmt
2449 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2451 if (post_shift != 1)
2453 append_pattern_def_seq (stmt_vinfo, def_stmt);
2455 q = vect_recog_temp_ssa_var (itype, NULL);
2456 pattern_stmt
2457 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2458 build_int_cst (itype,
2459 post_shift
2460 - 1));
2462 else
2464 q = t4;
2465 pattern_stmt = def_stmt;
2468 else
2470 if (pre_shift >= prec || post_shift >= prec)
2471 return NULL;
2473 /* t1 = oprnd0 >> pre_shift;
2474 t2 = t1 h* ml;
2475 q = t2 >> post_shift; */
2476 if (pre_shift)
2478 t1 = vect_recog_temp_ssa_var (itype, NULL);
2479 def_stmt
2480 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2481 build_int_cst (NULL,
2482 pre_shift));
2483 append_pattern_def_seq (stmt_vinfo, def_stmt);
2485 else
2486 t1 = oprnd0;
2488 t2 = vect_recog_temp_ssa_var (itype, NULL);
2489 def_stmt
2490 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2491 build_int_cst (itype, ml));
2493 if (post_shift)
2495 append_pattern_def_seq (stmt_vinfo, def_stmt);
2497 q = vect_recog_temp_ssa_var (itype, NULL);
2498 def_stmt
2499 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2500 build_int_cst (itype,
2501 post_shift));
2503 else
2504 q = t2;
2506 pattern_stmt = def_stmt;
2509 else
2511 unsigned HOST_WIDE_INT ml;
2512 int post_shift;
2513 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2514 unsigned HOST_WIDE_INT abs_d;
2515 bool add = false;
2516 tree t1, t2, t3, t4;
2518 /* Give up for -1. */
2519 if (d == -1)
2520 return NULL;
2522 /* Since d might be INT_MIN, we have to cast to
2523 unsigned HOST_WIDE_INT before negating to avoid
2524 undefined signed overflow. */
2525 abs_d = (d >= 0
2526 ? (unsigned HOST_WIDE_INT) d
2527 : - (unsigned HOST_WIDE_INT) d);
2529 /* n rem d = n rem -d */
2530 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2532 d = abs_d;
2533 oprnd1 = build_int_cst (itype, abs_d);
2535 else if (HOST_BITS_PER_WIDE_INT >= prec
2536 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2537 /* This case is not handled correctly below. */
2538 return NULL;
2540 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2541 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2543 add = true;
2544 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2546 if (post_shift >= prec)
2547 return NULL;
2549 /* t1 = oprnd0 h* ml; */
2550 t1 = vect_recog_temp_ssa_var (itype, NULL);
2551 def_stmt
2552 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2553 build_int_cst (itype, ml));
2555 if (add)
2557 /* t2 = t1 + oprnd0; */
2558 append_pattern_def_seq (stmt_vinfo, def_stmt);
2559 t2 = vect_recog_temp_ssa_var (itype, NULL);
2560 def_stmt
2561 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2563 else
2564 t2 = t1;
2566 if (post_shift)
2568 /* t3 = t2 >> post_shift; */
2569 append_pattern_def_seq (stmt_vinfo, def_stmt);
2570 t3 = vect_recog_temp_ssa_var (itype, NULL);
2571 def_stmt
2572 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2573 build_int_cst (itype, post_shift));
2575 else
2576 t3 = t2;
2578 wide_int oprnd0_min, oprnd0_max;
2579 int msb = 1;
2580 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2582 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2583 msb = 0;
2584 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2585 msb = -1;
2588 if (msb == 0 && d >= 0)
2590 /* q = t3; */
2591 q = t3;
2592 pattern_stmt = def_stmt;
2594 else
2596 /* t4 = oprnd0 >> (prec - 1);
2597 or if we know from VRP that oprnd0 >= 0
2598 t4 = 0;
2599 or if we know from VRP that oprnd0 < 0
2600 t4 = -1; */
2601 append_pattern_def_seq (stmt_vinfo, def_stmt);
2602 t4 = vect_recog_temp_ssa_var (itype, NULL);
2603 if (msb != 1)
2604 def_stmt
2605 = gimple_build_assign_with_ops (INTEGER_CST,
2606 t4, build_int_cst (itype, msb),
2607 NULL_TREE);
2608 else
2609 def_stmt
2610 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2611 build_int_cst (itype, prec - 1));
2612 append_pattern_def_seq (stmt_vinfo, def_stmt);
2614 /* q = t3 - t4; or q = t4 - t3; */
2615 q = vect_recog_temp_ssa_var (itype, NULL);
2616 pattern_stmt
2617 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2618 d < 0 ? t3 : t4);
2622 if (rhs_code == TRUNC_MOD_EXPR)
2624 tree r, t1;
2626 /* We divided. Now finish by:
2627 t1 = q * oprnd1;
2628 r = oprnd0 - t1; */
2629 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2631 t1 = vect_recog_temp_ssa_var (itype, NULL);
2632 def_stmt
2633 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2634 append_pattern_def_seq (stmt_vinfo, def_stmt);
2636 r = vect_recog_temp_ssa_var (itype, NULL);
2637 pattern_stmt
2638 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2641 /* Pattern detected. */
2642 if (dump_enabled_p ())
2644 dump_printf_loc (MSG_NOTE, vect_location,
2645 "vect_recog_divmod_pattern: detected: ");
2646 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2647 dump_printf (MSG_NOTE, "\n");
2650 stmts->safe_push (last_stmt);
2652 *type_in = vectype;
2653 *type_out = vectype;
2654 return pattern_stmt;
2657 /* Function vect_recog_mixed_size_cond_pattern
2659 Try to find the following pattern:
2661 type x_t, y_t;
2662 TYPE a_T, b_T, c_T;
2663 loop:
2664 S1 a_T = x_t CMP y_t ? b_T : c_T;
2666 where type 'TYPE' is an integral type which has different size
2667 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2668 than 'type', the constants need to fit into an integer type
2669 with the same width as 'type') or results of conversion from 'type'.
2671 Input:
2673 * LAST_STMT: A stmt from which the pattern search begins.
2675 Output:
2677 * TYPE_IN: The type of the input arguments to the pattern.
2679 * TYPE_OUT: The type of the output of this pattern.
2681 * Return value: A new stmt that will be used to replace the pattern.
2682 Additionally a def_stmt is added.
2684 a_it = x_t CMP y_t ? b_it : c_it;
2685 a_T = (TYPE) a_it; */
2687 static gimple
2688 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2689 tree *type_out)
2691 gimple last_stmt = (*stmts)[0];
2692 tree cond_expr, then_clause, else_clause;
2693 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2694 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2695 enum machine_mode cmpmode;
2696 gimple pattern_stmt, def_stmt;
2697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2698 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2699 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2700 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2701 bool promotion;
2702 tree comp_scalar_type;
2704 if (!is_gimple_assign (last_stmt)
2705 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2706 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2707 return NULL;
2709 cond_expr = gimple_assign_rhs1 (last_stmt);
2710 then_clause = gimple_assign_rhs2 (last_stmt);
2711 else_clause = gimple_assign_rhs3 (last_stmt);
2713 if (!COMPARISON_CLASS_P (cond_expr))
2714 return NULL;
2716 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2717 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2718 if (comp_vectype == NULL_TREE)
2719 return NULL;
2721 type = gimple_expr_type (last_stmt);
2722 if (types_compatible_p (type, comp_scalar_type)
2723 || ((TREE_CODE (then_clause) != INTEGER_CST
2724 || TREE_CODE (else_clause) != INTEGER_CST)
2725 && !INTEGRAL_TYPE_P (comp_scalar_type))
2726 || !INTEGRAL_TYPE_P (type))
2727 return NULL;
2729 if ((TREE_CODE (then_clause) != INTEGER_CST
2730 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2731 &def_stmt0, &promotion))
2732 || (TREE_CODE (else_clause) != INTEGER_CST
2733 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2734 &def_stmt1, &promotion)))
2735 return NULL;
2737 if (orig_type0 && orig_type1
2738 && !types_compatible_p (orig_type0, orig_type1))
2739 return NULL;
2741 if (orig_type0)
2743 if (!types_compatible_p (orig_type0, comp_scalar_type))
2744 return NULL;
2745 then_clause = gimple_assign_rhs1 (def_stmt0);
2746 itype = orig_type0;
2749 if (orig_type1)
2751 if (!types_compatible_p (orig_type1, comp_scalar_type))
2752 return NULL;
2753 else_clause = gimple_assign_rhs1 (def_stmt1);
2754 itype = orig_type1;
2757 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2759 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2760 return NULL;
2762 vectype = get_vectype_for_scalar_type (type);
2763 if (vectype == NULL_TREE)
2764 return NULL;
2766 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2767 return NULL;
2769 if (itype == NULL_TREE)
2770 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2771 TYPE_UNSIGNED (type));
2773 if (itype == NULL_TREE
2774 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2775 return NULL;
2777 vecitype = get_vectype_for_scalar_type (itype);
2778 if (vecitype == NULL_TREE)
2779 return NULL;
2781 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2782 return NULL;
2784 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2786 if ((TREE_CODE (then_clause) == INTEGER_CST
2787 && !int_fits_type_p (then_clause, itype))
2788 || (TREE_CODE (else_clause) == INTEGER_CST
2789 && !int_fits_type_p (else_clause, itype)))
2790 return NULL;
2793 def_stmt
2794 = gimple_build_assign_with_ops (COND_EXPR,
2795 vect_recog_temp_ssa_var (itype, NULL),
2796 unshare_expr (cond_expr),
2797 fold_convert (itype, then_clause),
2798 fold_convert (itype, else_clause));
2799 pattern_stmt
2800 = gimple_build_assign_with_ops (NOP_EXPR,
2801 vect_recog_temp_ssa_var (type, NULL),
2802 gimple_assign_lhs (def_stmt), NULL_TREE);
2804 new_pattern_def_seq (stmt_vinfo, def_stmt);
2805 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2806 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2807 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2808 *type_in = vecitype;
2809 *type_out = vectype;
2811 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_NOTE, vect_location,
2813 "vect_recog_mixed_size_cond_pattern: detected:\n");
2815 return pattern_stmt;
2819 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2820 true if bool VAR can be optimized that way. */
2822 static bool
2823 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2825 gimple def_stmt;
2826 enum vect_def_type dt;
2827 tree def, rhs1;
2828 enum tree_code rhs_code;
2830 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2831 &dt))
2832 return false;
2834 if (dt != vect_internal_def)
2835 return false;
2837 if (!is_gimple_assign (def_stmt))
2838 return false;
2840 if (!has_single_use (def))
2841 return false;
2843 rhs1 = gimple_assign_rhs1 (def_stmt);
2844 rhs_code = gimple_assign_rhs_code (def_stmt);
2845 switch (rhs_code)
2847 case SSA_NAME:
2848 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2850 CASE_CONVERT:
2851 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2852 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2853 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2854 return false;
2855 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2857 case BIT_NOT_EXPR:
2858 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2860 case BIT_AND_EXPR:
2861 case BIT_IOR_EXPR:
2862 case BIT_XOR_EXPR:
2863 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2864 return false;
2865 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2866 bb_vinfo);
2868 default:
2869 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2871 tree vecitype, comp_vectype;
2873 /* If the comparison can throw, then is_gimple_condexpr will be
2874 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2875 if (stmt_could_throw_p (def_stmt))
2876 return false;
2878 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2879 if (comp_vectype == NULL_TREE)
2880 return false;
2882 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2884 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2885 tree itype
2886 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2887 vecitype = get_vectype_for_scalar_type (itype);
2888 if (vecitype == NULL_TREE)
2889 return false;
2891 else
2892 vecitype = comp_vectype;
2893 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2895 return false;
2900 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2901 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2902 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2904 static tree
2905 adjust_bool_pattern_cast (tree type, tree var)
2907 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2908 gimple cast_stmt, pattern_stmt;
2910 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2911 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2912 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2913 cast_stmt
2914 = gimple_build_assign_with_ops (NOP_EXPR,
2915 vect_recog_temp_ssa_var (type, NULL),
2916 gimple_assign_lhs (pattern_stmt),
2917 NULL_TREE);
2918 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2919 return gimple_assign_lhs (cast_stmt);
2923 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2924 recursively. VAR is an SSA_NAME that should be transformed from bool
2925 to a wider integer type, OUT_TYPE is the desired final integer type of
2926 the whole pattern, TRUEVAL should be NULL unless optimizing
2927 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2928 in the then_clause, STMTS is where statements with added pattern stmts
2929 should be pushed to. */
2931 static tree
2932 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2933 vec<gimple> *stmts)
2935 gimple stmt = SSA_NAME_DEF_STMT (var);
2936 enum tree_code rhs_code, def_rhs_code;
2937 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2938 location_t loc;
2939 gimple pattern_stmt, def_stmt;
2941 rhs1 = gimple_assign_rhs1 (stmt);
2942 rhs2 = gimple_assign_rhs2 (stmt);
2943 rhs_code = gimple_assign_rhs_code (stmt);
2944 loc = gimple_location (stmt);
2945 switch (rhs_code)
2947 case SSA_NAME:
2948 CASE_CONVERT:
2949 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2950 itype = TREE_TYPE (irhs1);
2951 pattern_stmt
2952 = gimple_build_assign_with_ops (SSA_NAME,
2953 vect_recog_temp_ssa_var (itype, NULL),
2954 irhs1, NULL_TREE);
2955 break;
2957 case BIT_NOT_EXPR:
2958 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2959 itype = TREE_TYPE (irhs1);
2960 pattern_stmt
2961 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2962 vect_recog_temp_ssa_var (itype, NULL),
2963 irhs1, build_int_cst (itype, 1));
2964 break;
2966 case BIT_AND_EXPR:
2967 /* Try to optimize x = y & (a < b ? 1 : 0); into
2968 x = (a < b ? y : 0);
2970 E.g. for:
2971 bool a_b, b_b, c_b;
2972 TYPE d_T;
2974 S1 a_b = x1 CMP1 y1;
2975 S2 b_b = x2 CMP2 y2;
2976 S3 c_b = a_b & b_b;
2977 S4 d_T = (TYPE) c_b;
2979 we would normally emit:
2981 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2982 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2983 S3' c_T = a_T & b_T;
2984 S4' d_T = c_T;
2986 but we can save one stmt by using the
2987 result of one of the COND_EXPRs in the other COND_EXPR and leave
2988 BIT_AND_EXPR stmt out:
2990 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2991 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2992 S4' f_T = c_T;
2994 At least when VEC_COND_EXPR is implemented using masks
2995 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2996 computes the comparison masks and ands it, in one case with
2997 all ones vector, in the other case with a vector register.
2998 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2999 often more expensive. */
3000 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3001 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3002 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3004 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3005 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3006 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3007 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3009 gimple tstmt;
3010 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3011 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3012 tstmt = stmts->pop ();
3013 gcc_assert (tstmt == def_stmt);
3014 stmts->quick_push (stmt);
3015 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3016 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3017 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3018 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3019 return irhs2;
3021 else
3022 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3023 goto and_ior_xor;
3025 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3026 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3027 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3029 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3030 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3031 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3032 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3034 gimple tstmt;
3035 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3036 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3037 tstmt = stmts->pop ();
3038 gcc_assert (tstmt == def_stmt);
3039 stmts->quick_push (stmt);
3040 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3041 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3042 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3043 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3044 return irhs1;
3046 else
3047 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3048 goto and_ior_xor;
3050 /* FALLTHRU */
3051 case BIT_IOR_EXPR:
3052 case BIT_XOR_EXPR:
3053 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3054 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3055 and_ior_xor:
3056 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3057 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3059 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3060 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3061 int out_prec = TYPE_PRECISION (out_type);
3062 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3063 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3064 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3065 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3066 else
3068 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3069 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3072 itype = TREE_TYPE (irhs1);
3073 pattern_stmt
3074 = gimple_build_assign_with_ops (rhs_code,
3075 vect_recog_temp_ssa_var (itype, NULL),
3076 irhs1, irhs2);
3077 break;
3079 default:
3080 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3081 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3082 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3083 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3084 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3086 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3087 itype
3088 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3090 else
3091 itype = TREE_TYPE (rhs1);
3092 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3093 if (trueval == NULL_TREE)
3094 trueval = build_int_cst (itype, 1);
3095 else
3096 gcc_checking_assert (useless_type_conversion_p (itype,
3097 TREE_TYPE (trueval)));
3098 pattern_stmt
3099 = gimple_build_assign_with_ops (COND_EXPR,
3100 vect_recog_temp_ssa_var (itype, NULL),
3101 cond_expr, trueval,
3102 build_int_cst (itype, 0));
3103 break;
3106 stmts->safe_push (stmt);
3107 gimple_set_location (pattern_stmt, loc);
3108 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3109 return gimple_assign_lhs (pattern_stmt);
3113 /* Function vect_recog_bool_pattern
3115 Try to find pattern like following:
3117 bool a_b, b_b, c_b, d_b, e_b;
3118 TYPE f_T;
3119 loop:
3120 S1 a_b = x1 CMP1 y1;
3121 S2 b_b = x2 CMP2 y2;
3122 S3 c_b = a_b & b_b;
3123 S4 d_b = x3 CMP3 y3;
3124 S5 e_b = c_b | d_b;
3125 S6 f_T = (TYPE) e_b;
3127 where type 'TYPE' is an integral type. Or a similar pattern
3128 ending in
3130 S6 f_Y = e_b ? r_Y : s_Y;
3132 as results from if-conversion of a complex condition.
3134 Input:
3136 * LAST_STMT: A stmt at the end from which the pattern
3137 search begins, i.e. cast of a bool to
3138 an integer type.
3140 Output:
3142 * TYPE_IN: The type of the input arguments to the pattern.
3144 * TYPE_OUT: The type of the output of this pattern.
3146 * Return value: A new stmt that will be used to replace the pattern.
3148 Assuming size of TYPE is the same as size of all comparisons
3149 (otherwise some casts would be added where needed), the above
3150 sequence we create related pattern stmts:
3151 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3152 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3153 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3154 S5' e_T = c_T | d_T;
3155 S6' f_T = e_T;
3157 Instead of the above S3' we could emit:
3158 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3159 S3' c_T = a_T | b_T;
3160 but the above is more efficient. */
3162 static gimple
3163 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
3164 tree *type_out)
3166 gimple last_stmt = stmts->pop ();
3167 enum tree_code rhs_code;
3168 tree var, lhs, rhs, vectype;
3169 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3170 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3171 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
3172 gimple pattern_stmt;
3174 if (!is_gimple_assign (last_stmt))
3175 return NULL;
3177 var = gimple_assign_rhs1 (last_stmt);
3178 lhs = gimple_assign_lhs (last_stmt);
3180 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3181 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3182 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3183 return NULL;
3185 rhs_code = gimple_assign_rhs_code (last_stmt);
3186 if (CONVERT_EXPR_CODE_P (rhs_code))
3188 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3189 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3190 return NULL;
3191 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3192 if (vectype == NULL_TREE)
3193 return NULL;
3195 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3196 return NULL;
3198 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3199 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3200 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3201 pattern_stmt
3202 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
3203 else
3204 pattern_stmt
3205 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
3206 *type_out = vectype;
3207 *type_in = vectype;
3208 stmts->safe_push (last_stmt);
3209 if (dump_enabled_p ())
3210 dump_printf_loc (MSG_NOTE, vect_location,
3211 "vect_recog_bool_pattern: detected:\n");
3213 return pattern_stmt;
3215 else if (rhs_code == COND_EXPR
3216 && TREE_CODE (var) == SSA_NAME)
3218 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3219 if (vectype == NULL_TREE)
3220 return NULL;
3222 /* Build a scalar type for the boolean result that when
3223 vectorized matches the vector type of the result in
3224 size and number of elements. */
3225 unsigned prec
3226 = wi::udiv_trunc (TYPE_SIZE (vectype),
3227 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3228 tree type
3229 = build_nonstandard_integer_type (prec,
3230 TYPE_UNSIGNED (TREE_TYPE (var)));
3231 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3232 return NULL;
3234 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3235 return NULL;
3237 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3238 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3239 pattern_stmt
3240 = gimple_build_assign_with_ops (COND_EXPR, lhs,
3241 build2 (NE_EXPR, boolean_type_node,
3242 rhs, build_int_cst (type, 0)),
3243 gimple_assign_rhs2 (last_stmt),
3244 gimple_assign_rhs3 (last_stmt));
3245 *type_out = vectype;
3246 *type_in = vectype;
3247 stmts->safe_push (last_stmt);
3248 if (dump_enabled_p ())
3249 dump_printf_loc (MSG_NOTE, vect_location,
3250 "vect_recog_bool_pattern: detected:\n");
3252 return pattern_stmt;
3254 else if (rhs_code == SSA_NAME
3255 && STMT_VINFO_DATA_REF (stmt_vinfo))
3257 stmt_vec_info pattern_stmt_info;
3258 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3259 gcc_assert (vectype != NULL_TREE);
3260 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3261 return NULL;
3262 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3263 return NULL;
3265 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3266 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3267 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3269 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3270 gimple cast_stmt
3271 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
3272 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3273 rhs = rhs2;
3275 pattern_stmt
3276 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
3277 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3278 bb_vinfo);
3279 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3280 STMT_VINFO_DATA_REF (pattern_stmt_info)
3281 = STMT_VINFO_DATA_REF (stmt_vinfo);
3282 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3283 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3284 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3285 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3286 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3287 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3288 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3289 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3290 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3291 *type_out = vectype;
3292 *type_in = vectype;
3293 stmts->safe_push (last_stmt);
3294 if (dump_enabled_p ())
3295 dump_printf_loc (MSG_NOTE, vect_location,
3296 "vect_recog_bool_pattern: detected:\n");
3297 return pattern_stmt;
3299 else
3300 return NULL;
3304 /* Mark statements that are involved in a pattern. */
3306 static inline void
3307 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3308 tree pattern_vectype)
3310 stmt_vec_info pattern_stmt_info, def_stmt_info;
3311 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3312 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3313 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3314 gimple def_stmt;
3316 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3317 if (pattern_stmt_info == NULL)
3319 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3320 bb_vinfo);
3321 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3323 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3325 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3326 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3327 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3328 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3329 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3330 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3331 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3332 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3333 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3335 gimple_stmt_iterator si;
3336 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3337 !gsi_end_p (si); gsi_next (&si))
3339 def_stmt = gsi_stmt (si);
3340 def_stmt_info = vinfo_for_stmt (def_stmt);
3341 if (def_stmt_info == NULL)
3343 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3344 bb_vinfo);
3345 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3347 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3348 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3349 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3350 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3351 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3356 /* Function vect_pattern_recog_1
3358 Input:
3359 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3360 computation pattern.
3361 STMT: A stmt from which the pattern search should start.
3363 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3364 expression that computes the same functionality and can be used to
3365 replace the sequence of stmts that are involved in the pattern.
3367 Output:
3368 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3369 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3370 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3371 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3372 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3373 to the available target pattern.
3375 This function also does some bookkeeping, as explained in the documentation
3376 for vect_recog_pattern. */
3378 static void
3379 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3380 gimple_stmt_iterator si,
3381 vec<gimple> *stmts_to_replace)
3383 gimple stmt = gsi_stmt (si), pattern_stmt;
3384 stmt_vec_info stmt_info;
3385 loop_vec_info loop_vinfo;
3386 tree pattern_vectype;
3387 tree type_in, type_out;
3388 enum tree_code code;
3389 int i;
3390 gimple next;
3392 stmts_to_replace->truncate (0);
3393 stmts_to_replace->quick_push (stmt);
3394 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3395 if (!pattern_stmt)
3396 return;
3398 stmt = stmts_to_replace->last ();
3399 stmt_info = vinfo_for_stmt (stmt);
3400 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3402 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3404 /* No need to check target support (already checked by the pattern
3405 recognition function). */
3406 pattern_vectype = type_out ? type_out : type_in;
3408 else
3410 enum machine_mode vec_mode;
3411 enum insn_code icode;
3412 optab optab;
3414 /* Check target support */
3415 type_in = get_vectype_for_scalar_type (type_in);
3416 if (!type_in)
3417 return;
3418 if (type_out)
3419 type_out = get_vectype_for_scalar_type (type_out);
3420 else
3421 type_out = type_in;
3422 if (!type_out)
3423 return;
3424 pattern_vectype = type_out;
3426 if (is_gimple_assign (pattern_stmt))
3427 code = gimple_assign_rhs_code (pattern_stmt);
3428 else
3430 gcc_assert (is_gimple_call (pattern_stmt));
3431 code = CALL_EXPR;
3434 optab = optab_for_tree_code (code, type_in, optab_default);
3435 vec_mode = TYPE_MODE (type_in);
3436 if (!optab
3437 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3438 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3439 return;
3442 /* Found a vectorizable pattern. */
3443 if (dump_enabled_p ())
3445 dump_printf_loc (MSG_NOTE, vect_location,
3446 "pattern recognized: ");
3447 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3448 dump_printf (MSG_NOTE, "\n");
3451 /* Mark the stmts that are involved in the pattern. */
3452 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3454 /* Patterns cannot be vectorized using SLP, because they change the order of
3455 computation. */
3456 if (loop_vinfo)
3457 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3458 if (next == stmt)
3459 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3461 /* It is possible that additional pattern stmts are created and inserted in
3462 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3463 relevant statements. */
3464 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3465 && (unsigned) i < (stmts_to_replace->length () - 1);
3466 i++)
3468 stmt_info = vinfo_for_stmt (stmt);
3469 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3470 if (dump_enabled_p ())
3472 dump_printf_loc (MSG_NOTE, vect_location,
3473 "additional pattern stmt: ");
3474 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3475 dump_printf (MSG_NOTE, "\n");
3478 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3483 /* Function vect_pattern_recog
3485 Input:
3486 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3487 computation idioms.
3489 Output - for each computation idiom that is detected we create a new stmt
3490 that provides the same functionality and that can be vectorized. We
3491 also record some information in the struct_stmt_info of the relevant
3492 stmts, as explained below:
3494 At the entry to this function we have the following stmts, with the
3495 following initial value in the STMT_VINFO fields:
3497 stmt in_pattern_p related_stmt vec_stmt
3498 S1: a_i = .... - - -
3499 S2: a_2 = ..use(a_i).. - - -
3500 S3: a_1 = ..use(a_2).. - - -
3501 S4: a_0 = ..use(a_1).. - - -
3502 S5: ... = ..use(a_0).. - - -
3504 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3505 represented by a single stmt. We then:
3506 - create a new stmt S6 equivalent to the pattern (the stmt is not
3507 inserted into the code)
3508 - fill in the STMT_VINFO fields as follows:
3510 in_pattern_p related_stmt vec_stmt
3511 S1: a_i = .... - - -
3512 S2: a_2 = ..use(a_i).. - - -
3513 S3: a_1 = ..use(a_2).. - - -
3514 S4: a_0 = ..use(a_1).. true S6 -
3515 '---> S6: a_new = .... - S4 -
3516 S5: ... = ..use(a_0).. - - -
3518 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3519 to each other through the RELATED_STMT field).
3521 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3522 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3523 remain irrelevant unless used by stmts other than S4.
3525 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3526 (because they are marked as irrelevant). It will vectorize S6, and record
3527 a pointer to the new vector stmt VS6 from S6 (as usual).
3528 S4 will be skipped, and S5 will be vectorized as usual:
3530 in_pattern_p related_stmt vec_stmt
3531 S1: a_i = .... - - -
3532 S2: a_2 = ..use(a_i).. - - -
3533 S3: a_1 = ..use(a_2).. - - -
3534 > VS6: va_new = .... - - -
3535 S4: a_0 = ..use(a_1).. true S6 VS6
3536 '---> S6: a_new = .... - S4 VS6
3537 > VS5: ... = ..vuse(va_new).. - - -
3538 S5: ... = ..use(a_0).. - - -
3540 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3541 elsewhere), and we'll end up with:
3543 VS6: va_new = ....
3544 VS5: ... = ..vuse(va_new)..
3546 In case of more than one pattern statements, e.g., widen-mult with
3547 intermediate type:
3549 S1 a_t = ;
3550 S2 a_T = (TYPE) a_t;
3551 '--> S3: a_it = (interm_type) a_t;
3552 S4 prod_T = a_T * CONST;
3553 '--> S5: prod_T' = a_it w* CONST;
3555 there may be other users of a_T outside the pattern. In that case S2 will
3556 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3557 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3558 be recorded in S3. */
3560 void
3561 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3563 struct loop *loop;
3564 basic_block *bbs;
3565 unsigned int nbbs;
3566 gimple_stmt_iterator si;
3567 unsigned int i, j;
3568 vect_recog_func_ptr vect_recog_func;
3569 auto_vec<gimple, 1> stmts_to_replace;
3570 gimple stmt;
3572 if (dump_enabled_p ())
3573 dump_printf_loc (MSG_NOTE, vect_location,
3574 "=== vect_pattern_recog ===\n");
3576 if (loop_vinfo)
3578 loop = LOOP_VINFO_LOOP (loop_vinfo);
3579 bbs = LOOP_VINFO_BBS (loop_vinfo);
3580 nbbs = loop->num_nodes;
3582 else
3584 bbs = &BB_VINFO_BB (bb_vinfo);
3585 nbbs = 1;
3588 /* Scan through the loop stmts, applying the pattern recognition
3589 functions starting at each stmt visited: */
3590 for (i = 0; i < nbbs; i++)
3592 basic_block bb = bbs[i];
3593 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3595 if (bb_vinfo && (stmt = gsi_stmt (si))
3596 && vinfo_for_stmt (stmt)
3597 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3598 continue;
3600 /* Scan over all generic vect_recog_xxx_pattern functions. */
3601 for (j = 0; j < NUM_PATTERNS; j++)
3603 vect_recog_func = vect_vect_recog_func_ptrs[j];
3604 vect_pattern_recog_1 (vect_recog_func, si,
3605 &stmts_to_replace);