2013-05-23 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-patterns.c
blob648385a9b0cabe9a1c7d53db8a2a5a51c2536f2e
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-flow.h"
31 #include "cfgloop.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "params.h"
35 #include "tree-data-ref.h"
36 #include "tree-vectorizer.h"
37 #include "recog.h" /* FIXME: for insn_data */
38 #include "diagnostic-core.h"
39 #include "dumpfile.h"
41 /* Pattern recognition functions */
42 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
43 tree *);
44 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
45 tree *);
46 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
47 tree *);
48 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
49 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
50 tree *);
51 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
52 tree *, tree *);
53 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
55 tree *, tree *);
56 static gimple vect_recog_divmod_pattern (vec<gimple> *,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_widen_shift_pattern,
67 vect_recog_over_widening_pattern,
68 vect_recog_rotate_pattern,
69 vect_recog_vector_vector_shift_pattern,
70 vect_recog_divmod_pattern,
71 vect_recog_mixed_size_cond_pattern,
72 vect_recog_bool_pattern};
74 static inline void
75 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
77 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
78 stmt);
81 static inline void
82 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
84 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
85 append_pattern_def_seq (stmt_info, stmt);
88 /* Check whether STMT2 is in the same loop or basic block as STMT1.
89 Which of the two applies depends on whether we're currently doing
90 loop-based or basic-block-based vectorization, as determined by
91 the vinfo_for_stmt for STMT1 (which must be defined).
93 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
94 to be defined as well. */
96 static bool
97 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
99 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
100 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
101 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
103 if (!gimple_bb (stmt2))
104 return false;
106 if (loop_vinfo)
108 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
109 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
110 return false;
112 else
114 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
115 || gimple_code (stmt2) == GIMPLE_PHI)
116 return false;
119 gcc_assert (vinfo_for_stmt (stmt2));
120 return true;
123 /* If the LHS of DEF_STMT has a single use, and that statement is
124 in the same loop or basic block, return it. */
126 static gimple
127 vect_single_imm_use (gimple def_stmt)
129 tree lhs = gimple_assign_lhs (def_stmt);
130 use_operand_p use_p;
131 gimple use_stmt;
133 if (!single_imm_use (lhs, &use_p, &use_stmt))
134 return NULL;
136 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
137 return NULL;
139 return use_stmt;
142 /* Check whether NAME, an ssa-name used in USE_STMT,
143 is a result of a type promotion or demotion, such that:
144 DEF_STMT: NAME = NOP (name0)
145 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
146 If CHECK_SIGN is TRUE, check that either both types are signed or both are
147 unsigned. */
149 static bool
150 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
151 tree *orig_type, gimple *def_stmt, bool *promotion)
153 tree dummy;
154 gimple dummy_gimple;
155 loop_vec_info loop_vinfo;
156 stmt_vec_info stmt_vinfo;
157 tree type = TREE_TYPE (name);
158 tree oprnd0;
159 enum vect_def_type dt;
160 tree def;
161 bb_vec_info bb_vinfo;
163 stmt_vinfo = vinfo_for_stmt (use_stmt);
164 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
165 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
166 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
167 &def, &dt))
168 return false;
170 if (dt != vect_internal_def
171 && dt != vect_external_def && dt != vect_constant_def)
172 return false;
174 if (!*def_stmt)
175 return false;
177 if (!is_gimple_assign (*def_stmt))
178 return false;
180 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
181 return false;
183 oprnd0 = gimple_assign_rhs1 (*def_stmt);
185 *orig_type = TREE_TYPE (oprnd0);
186 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
187 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
188 return false;
190 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
191 *promotion = true;
192 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
193 *promotion = false;
194 else
195 return false;
197 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
198 bb_vinfo, &dummy_gimple, &dummy, &dt))
199 return false;
201 return true;
204 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
205 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
207 static tree
208 vect_recog_temp_ssa_var (tree type, gimple stmt)
210 return make_temp_ssa_name (type, stmt, "patt");
213 /* Function vect_recog_dot_prod_pattern
215 Try to find the following pattern:
217 type x_t, y_t;
218 TYPE1 prod;
219 TYPE2 sum = init;
220 loop:
221 sum_0 = phi <init, sum_1>
222 S1 x_t = ...
223 S2 y_t = ...
224 S3 x_T = (TYPE1) x_t;
225 S4 y_T = (TYPE1) y_t;
226 S5 prod = x_T * y_T;
227 [S6 prod = (TYPE2) prod; #optional]
228 S7 sum_1 = prod + sum_0;
230 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
231 same size of 'TYPE1' or bigger. This is a special case of a reduction
232 computation.
234 Input:
236 * STMTS: Contains a stmt from which the pattern search begins. In the
237 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
238 will be detected.
240 Output:
242 * TYPE_IN: The type of the input arguments to the pattern.
244 * TYPE_OUT: The type of the output of this pattern.
246 * Return value: A new stmt that will be used to replace the sequence of
247 stmts that constitute the pattern. In this case it will be:
248 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
250 Note: The dot-prod idiom is a widening reduction pattern that is
251 vectorized without preserving all the intermediate results. It
252 produces only N/2 (widened) results (by summing up pairs of
253 intermediate results) rather than all N results. Therefore, we
254 cannot allow this pattern when we want to get all the results and in
255 the correct order (as is the case when this computation is in an
256 inner-loop nested in an outer-loop that us being vectorized). */
258 static gimple
259 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
260 tree *type_out)
262 gimple stmt, last_stmt = (*stmts)[0];
263 tree oprnd0, oprnd1;
264 tree oprnd00, oprnd01;
265 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
266 tree type, half_type;
267 gimple pattern_stmt;
268 tree prod_type;
269 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
270 struct loop *loop;
271 tree var;
272 bool promotion;
274 if (!loop_info)
275 return NULL;
277 loop = LOOP_VINFO_LOOP (loop_info);
279 if (!is_gimple_assign (last_stmt))
280 return NULL;
282 type = gimple_expr_type (last_stmt);
284 /* Look for the following pattern
285 DX = (TYPE1) X;
286 DY = (TYPE1) Y;
287 DPROD = DX * DY;
288 DDPROD = (TYPE2) DPROD;
289 sum_1 = DDPROD + sum_0;
290 In which
291 - DX is double the size of X
292 - DY is double the size of Y
293 - DX, DY, DPROD all have the same type
294 - sum is the same size of DPROD or bigger
295 - sum has been recognized as a reduction variable.
297 This is equivalent to:
298 DPROD = X w* Y; #widen mult
299 sum_1 = DPROD w+ sum_0; #widen summation
301 DPROD = X w* Y; #widen mult
302 sum_1 = DPROD + sum_0; #summation
305 /* Starting from LAST_STMT, follow the defs of its uses in search
306 of the above pattern. */
308 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
309 return NULL;
311 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
313 /* Has been detected as widening-summation? */
315 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
316 type = gimple_expr_type (stmt);
317 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
318 return NULL;
319 oprnd0 = gimple_assign_rhs1 (stmt);
320 oprnd1 = gimple_assign_rhs2 (stmt);
321 half_type = TREE_TYPE (oprnd0);
323 else
325 gimple def_stmt;
327 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
328 return NULL;
329 oprnd0 = gimple_assign_rhs1 (last_stmt);
330 oprnd1 = gimple_assign_rhs2 (last_stmt);
331 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
332 || !types_compatible_p (TREE_TYPE (oprnd1), type))
333 return NULL;
334 stmt = last_stmt;
336 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
337 &promotion)
338 && promotion)
340 stmt = def_stmt;
341 oprnd0 = gimple_assign_rhs1 (stmt);
343 else
344 half_type = type;
347 /* So far so good. Since last_stmt was detected as a (summation) reduction,
348 we know that oprnd1 is the reduction variable (defined by a loop-header
349 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
350 Left to check that oprnd0 is defined by a (widen_)mult_expr */
351 if (TREE_CODE (oprnd0) != SSA_NAME)
352 return NULL;
354 prod_type = half_type;
355 stmt = SSA_NAME_DEF_STMT (oprnd0);
357 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
358 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
359 return NULL;
361 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
362 inside the loop (in case we are analyzing an outer-loop). */
363 if (!is_gimple_assign (stmt))
364 return NULL;
365 stmt_vinfo = vinfo_for_stmt (stmt);
366 gcc_assert (stmt_vinfo);
367 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
368 return NULL;
369 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
370 return NULL;
371 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
373 /* Has been detected as a widening multiplication? */
375 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
376 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
377 return NULL;
378 stmt_vinfo = vinfo_for_stmt (stmt);
379 gcc_assert (stmt_vinfo);
380 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
381 oprnd00 = gimple_assign_rhs1 (stmt);
382 oprnd01 = gimple_assign_rhs2 (stmt);
384 else
386 tree half_type0, half_type1;
387 gimple def_stmt;
388 tree oprnd0, oprnd1;
390 oprnd0 = gimple_assign_rhs1 (stmt);
391 oprnd1 = gimple_assign_rhs2 (stmt);
392 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
393 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
394 return NULL;
395 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
396 &promotion)
397 || !promotion)
398 return NULL;
399 oprnd00 = gimple_assign_rhs1 (def_stmt);
400 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
401 &promotion)
402 || !promotion)
403 return NULL;
404 oprnd01 = gimple_assign_rhs1 (def_stmt);
405 if (!types_compatible_p (half_type0, half_type1))
406 return NULL;
407 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
408 return NULL;
411 half_type = TREE_TYPE (oprnd00);
412 *type_in = half_type;
413 *type_out = type;
415 /* Pattern detected. Create a stmt to be used to replace the pattern: */
416 var = vect_recog_temp_ssa_var (type, NULL);
417 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
418 oprnd00, oprnd01, oprnd1);
420 if (dump_enabled_p ())
422 dump_printf_loc (MSG_NOTE, vect_location,
423 "vect_recog_dot_prod_pattern: detected: ");
424 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
427 /* We don't allow changing the order of the computation in the inner-loop
428 when doing outer-loop vectorization. */
429 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
431 return pattern_stmt;
435 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
436 and LSHIFT_EXPR.
438 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
439 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
441 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
442 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
443 that satisfies the above restrictions, we can perform a widening opeartion
444 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
445 with a_it = (interm_type) a_t; */
447 static bool
448 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
449 tree const_oprnd, tree *oprnd,
450 vec<gimple> *stmts, tree type,
451 tree *half_type, gimple def_stmt)
453 tree new_type, new_oprnd;
454 gimple new_stmt;
456 if (code != MULT_EXPR && code != LSHIFT_EXPR)
457 return false;
459 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
460 || (code == LSHIFT_EXPR
461 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
462 != 1))
463 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
465 /* CONST_OPRND is a constant of HALF_TYPE. */
466 *oprnd = gimple_assign_rhs1 (def_stmt);
467 return true;
470 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
471 return false;
473 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
474 return false;
476 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
477 a type 2 times bigger than HALF_TYPE. */
478 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
479 TYPE_UNSIGNED (type));
480 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
481 || (code == LSHIFT_EXPR
482 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
483 return false;
485 /* Use NEW_TYPE for widening operation. */
486 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
488 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
489 /* Check if the already created pattern stmt is what we need. */
490 if (!is_gimple_assign (new_stmt)
491 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
492 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
493 return false;
495 stmts->safe_push (def_stmt);
496 *oprnd = gimple_assign_lhs (new_stmt);
498 else
500 /* Create a_T = (NEW_TYPE) a_t; */
501 *oprnd = gimple_assign_rhs1 (def_stmt);
502 new_oprnd = make_ssa_name (new_type, NULL);
503 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
504 NULL_TREE);
505 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
506 stmts->safe_push (def_stmt);
507 *oprnd = new_oprnd;
510 *half_type = new_type;
511 return true;
515 /* Function vect_recog_widen_mult_pattern
517 Try to find the following pattern:
519 type a_t, b_t;
520 TYPE a_T, b_T, prod_T;
522 S1 a_t = ;
523 S2 b_t = ;
524 S3 a_T = (TYPE) a_t;
525 S4 b_T = (TYPE) b_t;
526 S5 prod_T = a_T * b_T;
528 where type 'TYPE' is at least double the size of type 'type'.
530 Also detect unsigned cases:
532 unsigned type a_t, b_t;
533 unsigned TYPE u_prod_T;
534 TYPE a_T, b_T, prod_T;
536 S1 a_t = ;
537 S2 b_t = ;
538 S3 a_T = (TYPE) a_t;
539 S4 b_T = (TYPE) b_t;
540 S5 prod_T = a_T * b_T;
541 S6 u_prod_T = (unsigned TYPE) prod_T;
543 and multiplication by constants:
545 type a_t;
546 TYPE a_T, prod_T;
548 S1 a_t = ;
549 S3 a_T = (TYPE) a_t;
550 S5 prod_T = a_T * CONST;
552 A special case of multiplication by constants is when 'TYPE' is 4 times
553 bigger than 'type', but CONST fits an intermediate type 2 times smaller
554 than 'TYPE'. In that case we create an additional pattern stmt for S3
555 to create a variable of the intermediate type, and perform widen-mult
556 on the intermediate type as well:
558 type a_t;
559 interm_type a_it;
560 TYPE a_T, prod_T, prod_T';
562 S1 a_t = ;
563 S3 a_T = (TYPE) a_t;
564 '--> a_it = (interm_type) a_t;
565 S5 prod_T = a_T * CONST;
566 '--> prod_T' = a_it w* CONST;
568 Input/Output:
570 * STMTS: Contains a stmt from which the pattern search begins. In the
571 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
572 is detected. In case of unsigned widen-mult, the original stmt (S5) is
573 replaced with S6 in STMTS. In case of multiplication by a constant
574 of an intermediate type (the last case above), STMTS also contains S3
575 (inserted before S5).
577 Output:
579 * TYPE_IN: The type of the input arguments to the pattern.
581 * TYPE_OUT: The type of the output of this pattern.
583 * Return value: A new stmt that will be used to replace the sequence of
584 stmts that constitute the pattern. In this case it will be:
585 WIDEN_MULT <a_t, b_t>
588 static gimple
589 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
590 tree *type_in, tree *type_out)
592 gimple last_stmt = stmts->pop ();
593 gimple def_stmt0, def_stmt1;
594 tree oprnd0, oprnd1;
595 tree type, half_type0, half_type1;
596 gimple pattern_stmt;
597 tree vectype, vectype_out = NULL_TREE;
598 tree var;
599 enum tree_code dummy_code;
600 int dummy_int;
601 vec<tree> dummy_vec;
602 bool op1_ok;
603 bool promotion;
605 if (!is_gimple_assign (last_stmt))
606 return NULL;
608 type = gimple_expr_type (last_stmt);
610 /* Starting from LAST_STMT, follow the defs of its uses in search
611 of the above pattern. */
613 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
614 return NULL;
616 oprnd0 = gimple_assign_rhs1 (last_stmt);
617 oprnd1 = gimple_assign_rhs2 (last_stmt);
618 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
619 || !types_compatible_p (TREE_TYPE (oprnd1), type))
620 return NULL;
622 /* Check argument 0. */
623 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
624 &promotion)
625 || !promotion)
626 return NULL;
627 /* Check argument 1. */
628 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
629 &def_stmt1, &promotion);
631 if (op1_ok && promotion)
633 oprnd0 = gimple_assign_rhs1 (def_stmt0);
634 oprnd1 = gimple_assign_rhs1 (def_stmt1);
636 else
638 if (TREE_CODE (oprnd1) == INTEGER_CST
639 && TREE_CODE (half_type0) == INTEGER_TYPE
640 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
641 &oprnd0, stmts, type,
642 &half_type0, def_stmt0))
643 half_type1 = half_type0;
644 else
645 return NULL;
648 /* Handle unsigned case. Look for
649 S6 u_prod_T = (unsigned TYPE) prod_T;
650 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
651 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
653 gimple use_stmt;
654 tree use_lhs;
655 tree use_type;
657 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
658 return NULL;
660 use_stmt = vect_single_imm_use (last_stmt);
661 if (!use_stmt || !is_gimple_assign (use_stmt)
662 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
663 return NULL;
665 use_lhs = gimple_assign_lhs (use_stmt);
666 use_type = TREE_TYPE (use_lhs);
667 if (!INTEGRAL_TYPE_P (use_type)
668 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
669 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
670 return NULL;
672 type = use_type;
673 last_stmt = use_stmt;
676 if (!types_compatible_p (half_type0, half_type1))
677 return NULL;
679 /* Pattern detected. */
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location,
682 "vect_recog_widen_mult_pattern: detected: ");
684 /* Check target support */
685 vectype = get_vectype_for_scalar_type (half_type0);
686 vectype_out = get_vectype_for_scalar_type (type);
687 if (!vectype
688 || !vectype_out
689 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
690 vectype_out, vectype,
691 &dummy_code, &dummy_code,
692 &dummy_int, &dummy_vec))
693 return NULL;
695 *type_in = vectype;
696 *type_out = vectype_out;
698 /* Pattern supported. Create a stmt to be used to replace the pattern: */
699 var = vect_recog_temp_ssa_var (type, NULL);
700 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
701 oprnd1);
703 if (dump_enabled_p ())
704 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
706 stmts->safe_push (last_stmt);
707 return pattern_stmt;
711 /* Function vect_recog_pow_pattern
713 Try to find the following pattern:
715 x = POW (y, N);
717 with POW being one of pow, powf, powi, powif and N being
718 either 2 or 0.5.
720 Input:
722 * LAST_STMT: A stmt from which the pattern search begins.
724 Output:
726 * TYPE_IN: The type of the input arguments to the pattern.
728 * TYPE_OUT: The type of the output of this pattern.
730 * Return value: A new stmt that will be used to replace the sequence of
731 stmts that constitute the pattern. In this case it will be:
732 x = x * x
734 x = sqrt (x)
737 static gimple
738 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
739 tree *type_out)
741 gimple last_stmt = (*stmts)[0];
742 tree fn, base, exp = NULL;
743 gimple stmt;
744 tree var;
746 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
747 return NULL;
749 fn = gimple_call_fndecl (last_stmt);
750 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
751 return NULL;
753 switch (DECL_FUNCTION_CODE (fn))
755 case BUILT_IN_POWIF:
756 case BUILT_IN_POWI:
757 case BUILT_IN_POWF:
758 case BUILT_IN_POW:
759 base = gimple_call_arg (last_stmt, 0);
760 exp = gimple_call_arg (last_stmt, 1);
761 if (TREE_CODE (exp) != REAL_CST
762 && TREE_CODE (exp) != INTEGER_CST)
763 return NULL;
764 break;
766 default:
767 return NULL;
770 /* We now have a pow or powi builtin function call with a constant
771 exponent. */
773 *type_out = NULL_TREE;
775 /* Catch squaring. */
776 if ((host_integerp (exp, 0)
777 && tree_low_cst (exp, 0) == 2)
778 || (TREE_CODE (exp) == REAL_CST
779 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
781 *type_in = TREE_TYPE (base);
783 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
784 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
785 return stmt;
788 /* Catch square root. */
789 if (TREE_CODE (exp) == REAL_CST
790 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
792 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
793 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
794 if (*type_in)
796 gimple stmt = gimple_build_call (newfn, 1, base);
797 if (vectorizable_function (stmt, *type_in, *type_in)
798 != NULL_TREE)
800 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
801 gimple_call_set_lhs (stmt, var);
802 return stmt;
807 return NULL;
811 /* Function vect_recog_widen_sum_pattern
813 Try to find the following pattern:
815 type x_t;
816 TYPE x_T, sum = init;
817 loop:
818 sum_0 = phi <init, sum_1>
819 S1 x_t = *p;
820 S2 x_T = (TYPE) x_t;
821 S3 sum_1 = x_T + sum_0;
823 where type 'TYPE' is at least double the size of type 'type', i.e - we're
824 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
825 a special case of a reduction computation.
827 Input:
829 * LAST_STMT: A stmt from which the pattern search begins. In the example,
830 when this function is called with S3, the pattern {S2,S3} will be detected.
832 Output:
834 * TYPE_IN: The type of the input arguments to the pattern.
836 * TYPE_OUT: The type of the output of this pattern.
838 * Return value: A new stmt that will be used to replace the sequence of
839 stmts that constitute the pattern. In this case it will be:
840 WIDEN_SUM <x_t, sum_0>
842 Note: The widening-sum idiom is a widening reduction pattern that is
843 vectorized without preserving all the intermediate results. It
844 produces only N/2 (widened) results (by summing up pairs of
845 intermediate results) rather than all N results. Therefore, we
846 cannot allow this pattern when we want to get all the results and in
847 the correct order (as is the case when this computation is in an
848 inner-loop nested in an outer-loop that us being vectorized). */
850 static gimple
851 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
852 tree *type_out)
854 gimple stmt, last_stmt = (*stmts)[0];
855 tree oprnd0, oprnd1;
856 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
857 tree type, half_type;
858 gimple pattern_stmt;
859 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
860 struct loop *loop;
861 tree var;
862 bool promotion;
864 if (!loop_info)
865 return NULL;
867 loop = LOOP_VINFO_LOOP (loop_info);
869 if (!is_gimple_assign (last_stmt))
870 return NULL;
872 type = gimple_expr_type (last_stmt);
874 /* Look for the following pattern
875 DX = (TYPE) X;
876 sum_1 = DX + sum_0;
877 In which DX is at least double the size of X, and sum_1 has been
878 recognized as a reduction variable.
881 /* Starting from LAST_STMT, follow the defs of its uses in search
882 of the above pattern. */
884 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
885 return NULL;
887 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
888 return NULL;
890 oprnd0 = gimple_assign_rhs1 (last_stmt);
891 oprnd1 = gimple_assign_rhs2 (last_stmt);
892 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
893 || !types_compatible_p (TREE_TYPE (oprnd1), type))
894 return NULL;
896 /* So far so good. Since last_stmt was detected as a (summation) reduction,
897 we know that oprnd1 is the reduction variable (defined by a loop-header
898 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
899 Left to check that oprnd0 is defined by a cast from type 'type' to type
900 'TYPE'. */
902 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
903 &promotion)
904 || !promotion)
905 return NULL;
907 oprnd0 = gimple_assign_rhs1 (stmt);
908 *type_in = half_type;
909 *type_out = type;
911 /* Pattern detected. Create a stmt to be used to replace the pattern: */
912 var = vect_recog_temp_ssa_var (type, NULL);
913 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
914 oprnd0, oprnd1);
916 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_recog_widen_sum_pattern: detected: ");
920 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
923 /* We don't allow changing the order of the computation in the inner-loop
924 when doing outer-loop vectorization. */
925 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
927 return pattern_stmt;
931 /* Return TRUE if the operation in STMT can be performed on a smaller type.
933 Input:
934 STMT - a statement to check.
935 DEF - we support operations with two operands, one of which is constant.
936 The other operand can be defined by a demotion operation, or by a
937 previous statement in a sequence of over-promoted operations. In the
938 later case DEF is used to replace that operand. (It is defined by a
939 pattern statement we created for the previous statement in the
940 sequence).
942 Input/output:
943 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
944 NULL, it's the type of DEF.
945 STMTS - additional pattern statements. If a pattern statement (type
946 conversion) is created in this function, its original statement is
947 added to STMTS.
949 Output:
950 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
951 operands to use in the new pattern statement for STMT (will be created
952 in vect_recog_over_widening_pattern ()).
953 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
954 statements for STMT: the first one is a type promotion and the second
955 one is the operation itself. We return the type promotion statement
956 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
957 the second pattern statement. */
959 static bool
960 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
961 tree *op0, tree *op1, gimple *new_def_stmt,
962 vec<gimple> *stmts)
964 enum tree_code code;
965 tree const_oprnd, oprnd;
966 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
967 gimple def_stmt, new_stmt;
968 bool first = false;
969 bool promotion;
971 *op0 = NULL_TREE;
972 *op1 = NULL_TREE;
973 *new_def_stmt = NULL;
975 if (!is_gimple_assign (stmt))
976 return false;
978 code = gimple_assign_rhs_code (stmt);
979 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
980 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
981 return false;
983 oprnd = gimple_assign_rhs1 (stmt);
984 const_oprnd = gimple_assign_rhs2 (stmt);
985 type = gimple_expr_type (stmt);
987 if (TREE_CODE (oprnd) != SSA_NAME
988 || TREE_CODE (const_oprnd) != INTEGER_CST)
989 return false;
991 /* If oprnd has other uses besides that in stmt we cannot mark it
992 as being part of a pattern only. */
993 if (!has_single_use (oprnd))
994 return false;
996 /* If we are in the middle of a sequence, we use DEF from a previous
997 statement. Otherwise, OPRND has to be a result of type promotion. */
998 if (*new_type)
1000 half_type = *new_type;
1001 oprnd = def;
1003 else
1005 first = true;
1006 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1007 &promotion)
1008 || !promotion
1009 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1010 return false;
1013 /* Can we perform the operation on a smaller type? */
1014 switch (code)
1016 case BIT_IOR_EXPR:
1017 case BIT_XOR_EXPR:
1018 case BIT_AND_EXPR:
1019 if (!int_fits_type_p (const_oprnd, half_type))
1021 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1022 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1023 return false;
1025 interm_type = build_nonstandard_integer_type (
1026 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1027 if (!int_fits_type_p (const_oprnd, interm_type))
1028 return false;
1031 break;
1033 case LSHIFT_EXPR:
1034 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1035 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1036 return false;
1038 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1039 (e.g., if the original value was char, the shift amount is at most 8
1040 if we want to use short). */
1041 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1042 return false;
1044 interm_type = build_nonstandard_integer_type (
1045 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1047 if (!vect_supportable_shift (code, interm_type))
1048 return false;
1050 break;
1052 case RSHIFT_EXPR:
1053 if (vect_supportable_shift (code, half_type))
1054 break;
1056 /* Try intermediate type - HALF_TYPE is not supported. */
1057 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1058 return false;
1060 interm_type = build_nonstandard_integer_type (
1061 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1063 if (!vect_supportable_shift (code, interm_type))
1064 return false;
1066 break;
1068 default:
1069 gcc_unreachable ();
1072 /* There are four possible cases:
1073 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1074 the first statement in the sequence)
1075 a. The original, HALF_TYPE, is not enough - we replace the promotion
1076 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1077 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1078 promotion.
1079 2. OPRND is defined by a pattern statement we created.
1080 a. Its type is not sufficient for the operation, we create a new stmt:
1081 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1082 this statement in NEW_DEF_STMT, and it is later put in
1083 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1084 b. OPRND is good to use in the new statement. */
1085 if (first)
1087 if (interm_type)
1089 /* Replace the original type conversion HALF_TYPE->TYPE with
1090 HALF_TYPE->INTERM_TYPE. */
1091 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1093 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1094 /* Check if the already created pattern stmt is what we need. */
1095 if (!is_gimple_assign (new_stmt)
1096 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1097 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1098 return false;
1100 stmts->safe_push (def_stmt);
1101 oprnd = gimple_assign_lhs (new_stmt);
1103 else
1105 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1106 oprnd = gimple_assign_rhs1 (def_stmt);
1107 new_oprnd = make_ssa_name (interm_type, NULL);
1108 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1109 oprnd, NULL_TREE);
1110 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1111 stmts->safe_push (def_stmt);
1112 oprnd = new_oprnd;
1115 else
1117 /* Retrieve the operand before the type promotion. */
1118 oprnd = gimple_assign_rhs1 (def_stmt);
1121 else
1123 if (interm_type)
1125 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1126 new_oprnd = make_ssa_name (interm_type, NULL);
1127 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1128 oprnd, NULL_TREE);
1129 oprnd = new_oprnd;
1130 *new_def_stmt = new_stmt;
1133 /* Otherwise, OPRND is already set. */
1136 if (interm_type)
1137 *new_type = interm_type;
1138 else
1139 *new_type = half_type;
1141 *op0 = oprnd;
1142 *op1 = fold_convert (*new_type, const_oprnd);
1144 return true;
1148 /* Try to find a statement or a sequence of statements that can be performed
1149 on a smaller type:
1151 type x_t;
1152 TYPE x_T, res0_T, res1_T;
1153 loop:
1154 S1 x_t = *p;
1155 S2 x_T = (TYPE) x_t;
1156 S3 res0_T = op (x_T, C0);
1157 S4 res1_T = op (res0_T, C1);
1158 S5 ... = () res1_T; - type demotion
1160 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1161 constants.
1162 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1163 be 'type' or some intermediate type. For now, we expect S5 to be a type
1164 demotion operation. We also check that S3 and S4 have only one use. */
1166 static gimple
1167 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1168 tree *type_in, tree *type_out)
1170 gimple stmt = stmts->pop ();
1171 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1172 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1173 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1174 bool first;
1175 tree type = NULL;
1177 first = true;
1178 while (1)
1180 if (!vinfo_for_stmt (stmt)
1181 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1182 return NULL;
1184 new_def_stmt = NULL;
1185 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1186 &op0, &op1, &new_def_stmt,
1187 stmts))
1189 if (first)
1190 return NULL;
1191 else
1192 break;
1195 /* STMT can be performed on a smaller type. Check its uses. */
1196 use_stmt = vect_single_imm_use (stmt);
1197 if (!use_stmt || !is_gimple_assign (use_stmt))
1198 return NULL;
1200 /* Create pattern statement for STMT. */
1201 vectype = get_vectype_for_scalar_type (new_type);
1202 if (!vectype)
1203 return NULL;
1205 /* We want to collect all the statements for which we create pattern
1206 statetments, except for the case when the last statement in the
1207 sequence doesn't have a corresponding pattern statement. In such
1208 case we associate the last pattern statement with the last statement
1209 in the sequence. Therefore, we only add the original statement to
1210 the list if we know that it is not the last. */
1211 if (prev_stmt)
1212 stmts->safe_push (prev_stmt);
1214 var = vect_recog_temp_ssa_var (new_type, NULL);
1215 pattern_stmt
1216 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1217 op0, op1);
1218 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1219 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1221 if (dump_enabled_p ())
1223 dump_printf_loc (MSG_NOTE, vect_location,
1224 "created pattern stmt: ");
1225 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1228 type = gimple_expr_type (stmt);
1229 prev_stmt = stmt;
1230 stmt = use_stmt;
1232 first = false;
1235 /* We got a sequence. We expect it to end with a type demotion operation.
1236 Otherwise, we quit (for now). There are three possible cases: the
1237 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1238 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1239 NEW_TYPE differs (we create a new conversion statement). */
1240 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1242 use_lhs = gimple_assign_lhs (use_stmt);
1243 use_type = TREE_TYPE (use_lhs);
1244 /* Support only type demotion or signedess change. */
1245 if (!INTEGRAL_TYPE_P (use_type)
1246 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1247 return NULL;
1249 /* Check that NEW_TYPE is not bigger than the conversion result. */
1250 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1251 return NULL;
1253 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1254 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1256 /* Create NEW_TYPE->USE_TYPE conversion. */
1257 new_oprnd = make_ssa_name (use_type, NULL);
1258 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1259 var, NULL_TREE);
1260 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1262 *type_in = get_vectype_for_scalar_type (new_type);
1263 *type_out = get_vectype_for_scalar_type (use_type);
1265 /* We created a pattern statement for the last statement in the
1266 sequence, so we don't need to associate it with the pattern
1267 statement created for PREV_STMT. Therefore, we add PREV_STMT
1268 to the list in order to mark it later in vect_pattern_recog_1. */
1269 if (prev_stmt)
1270 stmts->safe_push (prev_stmt);
1272 else
1274 if (prev_stmt)
1275 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1276 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1278 *type_in = vectype;
1279 *type_out = NULL_TREE;
1282 stmts->safe_push (use_stmt);
1284 else
1285 /* TODO: support general case, create a conversion to the correct type. */
1286 return NULL;
1288 /* Pattern detected. */
1289 if (dump_enabled_p ())
1291 dump_printf_loc (MSG_NOTE, vect_location,
1292 "vect_recog_over_widening_pattern: detected: ");
1293 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1296 return pattern_stmt;
1299 /* Detect widening shift pattern:
1301 type a_t;
1302 TYPE a_T, res_T;
1304 S1 a_t = ;
1305 S2 a_T = (TYPE) a_t;
1306 S3 res_T = a_T << CONST;
1308 where type 'TYPE' is at least double the size of type 'type'.
1310 Also detect cases where the shift result is immediately converted
1311 to another type 'result_type' that is no larger in size than 'TYPE'.
1312 In those cases we perform a widen-shift that directly results in
1313 'result_type', to avoid a possible over-widening situation:
1315 type a_t;
1316 TYPE a_T, res_T;
1317 result_type res_result;
1319 S1 a_t = ;
1320 S2 a_T = (TYPE) a_t;
1321 S3 res_T = a_T << CONST;
1322 S4 res_result = (result_type) res_T;
1323 '--> res_result' = a_t w<< CONST;
1325 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1326 create an additional pattern stmt for S2 to create a variable of an
1327 intermediate type, and perform widen-shift on the intermediate type:
1329 type a_t;
1330 interm_type a_it;
1331 TYPE a_T, res_T, res_T';
1333 S1 a_t = ;
1334 S2 a_T = (TYPE) a_t;
1335 '--> a_it = (interm_type) a_t;
1336 S3 res_T = a_T << CONST;
1337 '--> res_T' = a_it <<* CONST;
1339 Input/Output:
1341 * STMTS: Contains a stmt from which the pattern search begins.
1342 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1343 in STMTS. When an intermediate type is used and a pattern statement is
1344 created for S2, we also put S2 here (before S3).
1346 Output:
1348 * TYPE_IN: The type of the input arguments to the pattern.
1350 * TYPE_OUT: The type of the output of this pattern.
1352 * Return value: A new stmt that will be used to replace the sequence of
1353 stmts that constitute the pattern. In this case it will be:
1354 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1356 static gimple
1357 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1358 tree *type_in, tree *type_out)
1360 gimple last_stmt = stmts->pop ();
1361 gimple def_stmt0;
1362 tree oprnd0, oprnd1;
1363 tree type, half_type0;
1364 gimple pattern_stmt;
1365 tree vectype, vectype_out = NULL_TREE;
1366 tree var;
1367 enum tree_code dummy_code;
1368 int dummy_int;
1369 vec<tree> dummy_vec;
1370 gimple use_stmt;
1371 bool promotion;
1373 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1374 return NULL;
1376 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1377 return NULL;
1379 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1380 return NULL;
1382 oprnd0 = gimple_assign_rhs1 (last_stmt);
1383 oprnd1 = gimple_assign_rhs2 (last_stmt);
1384 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1385 return NULL;
1387 /* Check operand 0: it has to be defined by a type promotion. */
1388 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1389 &promotion)
1390 || !promotion)
1391 return NULL;
1393 /* Check operand 1: has to be positive. We check that it fits the type
1394 in vect_handle_widen_op_by_const (). */
1395 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1396 return NULL;
1398 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1399 type = gimple_expr_type (last_stmt);
1401 /* Check for subsequent conversion to another type. */
1402 use_stmt = vect_single_imm_use (last_stmt);
1403 if (use_stmt && is_gimple_assign (use_stmt)
1404 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1405 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1407 tree use_lhs = gimple_assign_lhs (use_stmt);
1408 tree use_type = TREE_TYPE (use_lhs);
1410 if (INTEGRAL_TYPE_P (use_type)
1411 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1413 last_stmt = use_stmt;
1414 type = use_type;
1418 /* Check if this a widening operation. */
1419 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1420 &oprnd0, stmts,
1421 type, &half_type0, def_stmt0))
1422 return NULL;
1424 /* Pattern detected. */
1425 if (dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "vect_recog_widen_shift_pattern: detected: ");
1429 /* Check target support. */
1430 vectype = get_vectype_for_scalar_type (half_type0);
1431 vectype_out = get_vectype_for_scalar_type (type);
1433 if (!vectype
1434 || !vectype_out
1435 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1436 vectype_out, vectype,
1437 &dummy_code, &dummy_code,
1438 &dummy_int, &dummy_vec))
1439 return NULL;
1441 *type_in = vectype;
1442 *type_out = vectype_out;
1444 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1445 var = vect_recog_temp_ssa_var (type, NULL);
1446 pattern_stmt =
1447 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1449 if (dump_enabled_p ())
1450 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1452 stmts->safe_push (last_stmt);
1453 return pattern_stmt;
1456 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1458 type a_t, b_t, c_t;
1460 S0 a_t = b_t r<< c_t;
1462 Input/Output:
1464 * STMTS: Contains a stmt from which the pattern search begins,
1465 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1466 with a sequence:
1468 S1 d_t = -c_t;
1469 S2 e_t = d_t & (B - 1);
1470 S3 f_t = b_t << c_t;
1471 S4 g_t = b_t >> e_t;
1472 S0 a_t = f_t | g_t;
1474 where B is element bitsize of type.
1476 Output:
1478 * TYPE_IN: The type of the input arguments to the pattern.
1480 * TYPE_OUT: The type of the output of this pattern.
1482 * Return value: A new stmt that will be used to replace the rotate
1483 S0 stmt. */
1485 static gimple
1486 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1488 gimple last_stmt = stmts->pop ();
1489 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1490 gimple pattern_stmt, def_stmt;
1491 enum tree_code rhs_code;
1492 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1493 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1494 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1495 enum vect_def_type dt;
1496 optab optab1, optab2;
1497 edge ext_def = NULL;
1499 if (!is_gimple_assign (last_stmt))
1500 return NULL;
1502 rhs_code = gimple_assign_rhs_code (last_stmt);
1503 switch (rhs_code)
1505 case LROTATE_EXPR:
1506 case RROTATE_EXPR:
1507 break;
1508 default:
1509 return NULL;
1512 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1513 return NULL;
1515 lhs = gimple_assign_lhs (last_stmt);
1516 oprnd0 = gimple_assign_rhs1 (last_stmt);
1517 type = TREE_TYPE (oprnd0);
1518 oprnd1 = gimple_assign_rhs2 (last_stmt);
1519 if (TREE_CODE (oprnd0) != SSA_NAME
1520 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1521 || !INTEGRAL_TYPE_P (type)
1522 || !TYPE_UNSIGNED (type))
1523 return NULL;
1525 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1526 &def, &dt))
1527 return NULL;
1529 if (dt != vect_internal_def
1530 && dt != vect_constant_def
1531 && dt != vect_external_def)
1532 return NULL;
1534 vectype = get_vectype_for_scalar_type (type);
1535 if (vectype == NULL_TREE)
1536 return NULL;
1538 /* If vector/vector or vector/scalar rotate is supported by the target,
1539 don't do anything here. */
1540 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1541 if (optab1
1542 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1543 return NULL;
1545 if (bb_vinfo != NULL || dt != vect_internal_def)
1547 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1548 if (optab2
1549 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1550 return NULL;
1553 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1554 don't do anything here either. */
1555 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1556 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1557 if (!optab1
1558 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1559 || !optab2
1560 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1562 if (bb_vinfo == NULL && dt == vect_internal_def)
1563 return NULL;
1564 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1565 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1566 if (!optab1
1567 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1568 || !optab2
1569 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1570 return NULL;
1573 *type_in = vectype;
1574 *type_out = vectype;
1575 if (*type_in == NULL_TREE)
1576 return NULL;
1578 if (dt == vect_external_def
1579 && TREE_CODE (oprnd1) == SSA_NAME
1580 && loop_vinfo)
1582 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1583 ext_def = loop_preheader_edge (loop);
1584 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1586 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1587 if (bb == NULL
1588 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1589 ext_def = NULL;
1593 def = NULL_TREE;
1594 if (TREE_CODE (oprnd1) == INTEGER_CST
1595 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1596 def = oprnd1;
1597 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1599 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1600 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1601 && TYPE_PRECISION (TREE_TYPE (rhs1))
1602 == TYPE_PRECISION (type))
1603 def = rhs1;
1606 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1607 if (def == NULL_TREE)
1609 def = vect_recog_temp_ssa_var (type, NULL);
1610 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1611 NULL_TREE);
1612 if (ext_def)
1614 basic_block new_bb
1615 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1616 gcc_assert (!new_bb);
1618 else
1619 append_pattern_def_seq (stmt_vinfo, def_stmt);
1621 stype = TREE_TYPE (def);
1623 if (TREE_CODE (def) == INTEGER_CST)
1625 if (!host_integerp (def, 1)
1626 || (unsigned HOST_WIDE_INT) tree_low_cst (def, 1)
1627 >= GET_MODE_PRECISION (TYPE_MODE (type))
1628 || integer_zerop (def))
1629 return NULL;
1630 def2 = build_int_cst (stype,
1631 GET_MODE_PRECISION (TYPE_MODE (type))
1632 - tree_low_cst (def, 1));
1634 else
1636 tree vecstype = get_vectype_for_scalar_type (stype);
1637 stmt_vec_info def_stmt_vinfo;
1639 if (vecstype == NULL_TREE)
1640 return NULL;
1641 def2 = vect_recog_temp_ssa_var (stype, NULL);
1642 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1643 NULL_TREE);
1644 if (ext_def)
1646 basic_block new_bb
1647 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1648 gcc_assert (!new_bb);
1650 else
1652 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1653 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1654 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1655 append_pattern_def_seq (stmt_vinfo, def_stmt);
1658 def2 = vect_recog_temp_ssa_var (stype, NULL);
1659 tree mask
1660 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1661 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1662 gimple_assign_lhs (def_stmt),
1663 mask);
1664 if (ext_def)
1666 basic_block new_bb
1667 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1668 gcc_assert (!new_bb);
1670 else
1672 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1673 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1674 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1675 append_pattern_def_seq (stmt_vinfo, def_stmt);
1679 var1 = vect_recog_temp_ssa_var (type, NULL);
1680 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1681 ? LSHIFT_EXPR : RSHIFT_EXPR,
1682 var1, oprnd0, def);
1683 append_pattern_def_seq (stmt_vinfo, def_stmt);
1685 var2 = vect_recog_temp_ssa_var (type, NULL);
1686 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1687 ? RSHIFT_EXPR : LSHIFT_EXPR,
1688 var2, oprnd0, def2);
1689 append_pattern_def_seq (stmt_vinfo, def_stmt);
1691 /* Pattern detected. */
1692 if (dump_enabled_p ())
1693 dump_printf_loc (MSG_NOTE, vect_location,
1694 "vect_recog_rotate_pattern: detected: ");
1696 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1697 var = vect_recog_temp_ssa_var (type, NULL);
1698 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1700 if (dump_enabled_p ())
1701 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1703 stmts->safe_push (last_stmt);
1704 return pattern_stmt;
1707 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1708 vectorized:
1710 type a_t;
1711 TYPE b_T, res_T;
1713 S1 a_t = ;
1714 S2 b_T = ;
1715 S3 res_T = b_T op a_t;
1717 where type 'TYPE' is a type with different size than 'type',
1718 and op is <<, >> or rotate.
1720 Also detect cases:
1722 type a_t;
1723 TYPE b_T, c_T, res_T;
1725 S0 c_T = ;
1726 S1 a_t = (type) c_T;
1727 S2 b_T = ;
1728 S3 res_T = b_T op a_t;
1730 Input/Output:
1732 * STMTS: Contains a stmt from which the pattern search begins,
1733 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1734 with a shift/rotate which has same type on both operands, in the
1735 second case just b_T op c_T, in the first case with added cast
1736 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1738 Output:
1740 * TYPE_IN: The type of the input arguments to the pattern.
1742 * TYPE_OUT: The type of the output of this pattern.
1744 * Return value: A new stmt that will be used to replace the shift/rotate
1745 S3 stmt. */
1747 static gimple
1748 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1749 tree *type_in, tree *type_out)
1751 gimple last_stmt = stmts->pop ();
1752 tree oprnd0, oprnd1, lhs, var;
1753 gimple pattern_stmt, def_stmt;
1754 enum tree_code rhs_code;
1755 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1756 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1757 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1758 enum vect_def_type dt;
1759 tree def;
1761 if (!is_gimple_assign (last_stmt))
1762 return NULL;
1764 rhs_code = gimple_assign_rhs_code (last_stmt);
1765 switch (rhs_code)
1767 case LSHIFT_EXPR:
1768 case RSHIFT_EXPR:
1769 case LROTATE_EXPR:
1770 case RROTATE_EXPR:
1771 break;
1772 default:
1773 return NULL;
1776 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1777 return NULL;
1779 lhs = gimple_assign_lhs (last_stmt);
1780 oprnd0 = gimple_assign_rhs1 (last_stmt);
1781 oprnd1 = gimple_assign_rhs2 (last_stmt);
1782 if (TREE_CODE (oprnd0) != SSA_NAME
1783 || TREE_CODE (oprnd1) != SSA_NAME
1784 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1785 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1786 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1787 || TYPE_PRECISION (TREE_TYPE (lhs))
1788 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1789 return NULL;
1791 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1792 &def, &dt))
1793 return NULL;
1795 if (dt != vect_internal_def)
1796 return NULL;
1798 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1799 *type_out = *type_in;
1800 if (*type_in == NULL_TREE)
1801 return NULL;
1803 def = NULL_TREE;
1804 if (gimple_assign_cast_p (def_stmt))
1806 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1807 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1808 && TYPE_PRECISION (TREE_TYPE (rhs1))
1809 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1810 def = rhs1;
1813 if (def == NULL_TREE)
1815 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1816 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1817 NULL_TREE);
1818 new_pattern_def_seq (stmt_vinfo, def_stmt);
1821 /* Pattern detected. */
1822 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_NOTE, vect_location,
1824 "vect_recog_vector_vector_shift_pattern: detected: ");
1826 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1827 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1828 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1830 if (dump_enabled_p ())
1831 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1833 stmts->safe_push (last_stmt);
1834 return pattern_stmt;
1837 /* Detect a signed division by a constant that wouldn't be
1838 otherwise vectorized:
1840 type a_t, b_t;
1842 S1 a_t = b_t / N;
1844 where type 'type' is an integral type and N is a constant.
1846 Similarly handle modulo by a constant:
1848 S4 a_t = b_t % N;
1850 Input/Output:
1852 * STMTS: Contains a stmt from which the pattern search begins,
1853 i.e. the division stmt. S1 is replaced by if N is a power
1854 of two constant and type is signed:
1855 S3 y_t = b_t < 0 ? N - 1 : 0;
1856 S2 x_t = b_t + y_t;
1857 S1' a_t = x_t >> log2 (N);
1859 S4 is replaced if N is a power of two constant and
1860 type is signed by (where *_T temporaries have unsigned type):
1861 S9 y_T = b_t < 0 ? -1U : 0U;
1862 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1863 S7 z_t = (type) z_T;
1864 S6 w_t = b_t + z_t;
1865 S5 x_t = w_t & (N - 1);
1866 S4' a_t = x_t - z_t;
1868 Output:
1870 * TYPE_IN: The type of the input arguments to the pattern.
1872 * TYPE_OUT: The type of the output of this pattern.
1874 * Return value: A new stmt that will be used to replace the division
1875 S1 or modulo S4 stmt. */
1877 static gimple
1878 vect_recog_divmod_pattern (vec<gimple> *stmts,
1879 tree *type_in, tree *type_out)
1881 gimple last_stmt = stmts->pop ();
1882 tree oprnd0, oprnd1, vectype, itype, cond;
1883 gimple pattern_stmt, def_stmt;
1884 enum tree_code rhs_code;
1885 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1886 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1887 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1888 optab optab;
1889 tree q;
1890 int dummy_int, prec;
1891 stmt_vec_info def_stmt_vinfo;
1893 if (!is_gimple_assign (last_stmt))
1894 return NULL;
1896 rhs_code = gimple_assign_rhs_code (last_stmt);
1897 switch (rhs_code)
1899 case TRUNC_DIV_EXPR:
1900 case TRUNC_MOD_EXPR:
1901 break;
1902 default:
1903 return NULL;
1906 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1907 return NULL;
1909 oprnd0 = gimple_assign_rhs1 (last_stmt);
1910 oprnd1 = gimple_assign_rhs2 (last_stmt);
1911 itype = TREE_TYPE (oprnd0);
1912 if (TREE_CODE (oprnd0) != SSA_NAME
1913 || TREE_CODE (oprnd1) != INTEGER_CST
1914 || TREE_CODE (itype) != INTEGER_TYPE
1915 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1916 return NULL;
1918 vectype = get_vectype_for_scalar_type (itype);
1919 if (vectype == NULL_TREE)
1920 return NULL;
1922 /* If the target can handle vectorized division or modulo natively,
1923 don't attempt to optimize this. */
1924 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1925 if (optab != unknown_optab)
1927 enum machine_mode vec_mode = TYPE_MODE (vectype);
1928 int icode = (int) optab_handler (optab, vec_mode);
1929 if (icode != CODE_FOR_nothing)
1930 return NULL;
1933 prec = TYPE_PRECISION (itype);
1934 if (integer_pow2p (oprnd1))
1936 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1937 return NULL;
1939 /* Pattern detected. */
1940 if (dump_enabled_p ())
1941 dump_printf_loc (MSG_NOTE, vect_location,
1942 "vect_recog_divmod_pattern: detected: ");
1944 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1945 build_int_cst (itype, 0));
1946 if (rhs_code == TRUNC_DIV_EXPR)
1948 tree var = vect_recog_temp_ssa_var (itype, NULL);
1949 tree shift;
1950 def_stmt
1951 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1952 fold_build2 (MINUS_EXPR, itype,
1953 oprnd1,
1954 build_int_cst (itype,
1955 1)),
1956 build_int_cst (itype, 0));
1957 new_pattern_def_seq (stmt_vinfo, def_stmt);
1958 var = vect_recog_temp_ssa_var (itype, NULL);
1959 def_stmt
1960 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1961 gimple_assign_lhs (def_stmt));
1962 append_pattern_def_seq (stmt_vinfo, def_stmt);
1964 shift = build_int_cst (itype, tree_log2 (oprnd1));
1965 pattern_stmt
1966 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1967 vect_recog_temp_ssa_var (itype,
1968 NULL),
1969 var, shift);
1971 else
1973 tree signmask;
1974 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1975 if (compare_tree_int (oprnd1, 2) == 0)
1977 signmask = vect_recog_temp_ssa_var (itype, NULL);
1978 def_stmt
1979 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1980 build_int_cst (itype, 1),
1981 build_int_cst (itype, 0));
1982 append_pattern_def_seq (stmt_vinfo, def_stmt);
1984 else
1986 tree utype
1987 = build_nonstandard_integer_type (prec, 1);
1988 tree vecutype = get_vectype_for_scalar_type (utype);
1989 tree shift
1990 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1991 - tree_log2 (oprnd1));
1992 tree var = vect_recog_temp_ssa_var (utype, NULL);
1994 def_stmt
1995 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1996 build_int_cst (utype, -1),
1997 build_int_cst (utype, 0));
1998 def_stmt_vinfo
1999 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2000 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2001 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2002 append_pattern_def_seq (stmt_vinfo, def_stmt);
2003 var = vect_recog_temp_ssa_var (utype, NULL);
2004 def_stmt
2005 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2006 gimple_assign_lhs (def_stmt),
2007 shift);
2008 def_stmt_vinfo
2009 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2010 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2011 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2012 append_pattern_def_seq (stmt_vinfo, def_stmt);
2013 signmask = vect_recog_temp_ssa_var (itype, NULL);
2014 def_stmt
2015 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2016 NULL_TREE);
2017 append_pattern_def_seq (stmt_vinfo, def_stmt);
2019 def_stmt
2020 = gimple_build_assign_with_ops (PLUS_EXPR,
2021 vect_recog_temp_ssa_var (itype,
2022 NULL),
2023 oprnd0, signmask);
2024 append_pattern_def_seq (stmt_vinfo, def_stmt);
2025 def_stmt
2026 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2027 vect_recog_temp_ssa_var (itype,
2028 NULL),
2029 gimple_assign_lhs (def_stmt),
2030 fold_build2 (MINUS_EXPR, itype,
2031 oprnd1,
2032 build_int_cst (itype,
2033 1)));
2034 append_pattern_def_seq (stmt_vinfo, def_stmt);
2036 pattern_stmt
2037 = gimple_build_assign_with_ops (MINUS_EXPR,
2038 vect_recog_temp_ssa_var (itype,
2039 NULL),
2040 gimple_assign_lhs (def_stmt),
2041 signmask);
2044 if (dump_enabled_p ())
2045 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2048 stmts->safe_push (last_stmt);
2050 *type_in = vectype;
2051 *type_out = vectype;
2052 return pattern_stmt;
2055 if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
2056 || integer_zerop (oprnd1)
2057 || prec > HOST_BITS_PER_WIDE_INT)
2058 return NULL;
2060 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2061 return NULL;
2063 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2065 if (TYPE_UNSIGNED (itype))
2067 unsigned HOST_WIDE_INT mh, ml;
2068 int pre_shift, post_shift;
2069 unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
2070 & GET_MODE_MASK (TYPE_MODE (itype));
2071 tree t1, t2, t3, t4;
2073 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2074 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2075 return NULL;
2077 /* Find a suitable multiplier and right shift count
2078 instead of multiplying with D. */
2079 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2081 /* If the suggested multiplier is more than SIZE bits, we can do better
2082 for even divisors, using an initial right shift. */
2083 if (mh != 0 && (d & 1) == 0)
2085 pre_shift = floor_log2 (d & -d);
2086 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2087 &ml, &post_shift, &dummy_int);
2088 gcc_assert (!mh);
2090 else
2091 pre_shift = 0;
2093 if (mh != 0)
2095 if (post_shift - 1 >= prec)
2096 return NULL;
2098 /* t1 = oprnd0 h* ml;
2099 t2 = oprnd0 - t1;
2100 t3 = t2 >> 1;
2101 t4 = t1 + t3;
2102 q = t4 >> (post_shift - 1); */
2103 t1 = vect_recog_temp_ssa_var (itype, NULL);
2104 def_stmt
2105 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2106 build_int_cst (itype, ml));
2107 append_pattern_def_seq (stmt_vinfo, def_stmt);
2109 t2 = vect_recog_temp_ssa_var (itype, NULL);
2110 def_stmt
2111 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2112 append_pattern_def_seq (stmt_vinfo, def_stmt);
2114 t3 = vect_recog_temp_ssa_var (itype, NULL);
2115 def_stmt
2116 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2117 integer_one_node);
2118 append_pattern_def_seq (stmt_vinfo, def_stmt);
2120 t4 = vect_recog_temp_ssa_var (itype, NULL);
2121 def_stmt
2122 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2124 if (post_shift != 1)
2126 append_pattern_def_seq (stmt_vinfo, def_stmt);
2128 q = vect_recog_temp_ssa_var (itype, NULL);
2129 pattern_stmt
2130 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2131 build_int_cst (itype,
2132 post_shift
2133 - 1));
2135 else
2137 q = t4;
2138 pattern_stmt = def_stmt;
2141 else
2143 if (pre_shift >= prec || post_shift >= prec)
2144 return NULL;
2146 /* t1 = oprnd0 >> pre_shift;
2147 t2 = t1 h* ml;
2148 q = t2 >> post_shift; */
2149 if (pre_shift)
2151 t1 = vect_recog_temp_ssa_var (itype, NULL);
2152 def_stmt
2153 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2154 build_int_cst (NULL,
2155 pre_shift));
2156 append_pattern_def_seq (stmt_vinfo, def_stmt);
2158 else
2159 t1 = oprnd0;
2161 t2 = vect_recog_temp_ssa_var (itype, NULL);
2162 def_stmt
2163 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2164 build_int_cst (itype, ml));
2166 if (post_shift)
2168 append_pattern_def_seq (stmt_vinfo, def_stmt);
2170 q = vect_recog_temp_ssa_var (itype, NULL);
2171 def_stmt
2172 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2173 build_int_cst (itype,
2174 post_shift));
2176 else
2177 q = t2;
2179 pattern_stmt = def_stmt;
2182 else
2184 unsigned HOST_WIDE_INT ml;
2185 int post_shift;
2186 HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
2187 unsigned HOST_WIDE_INT abs_d;
2188 bool add = false;
2189 tree t1, t2, t3, t4;
2191 /* Give up for -1. */
2192 if (d == -1)
2193 return NULL;
2195 /* Since d might be INT_MIN, we have to cast to
2196 unsigned HOST_WIDE_INT before negating to avoid
2197 undefined signed overflow. */
2198 abs_d = (d >= 0
2199 ? (unsigned HOST_WIDE_INT) d
2200 : - (unsigned HOST_WIDE_INT) d);
2202 /* n rem d = n rem -d */
2203 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2205 d = abs_d;
2206 oprnd1 = build_int_cst (itype, abs_d);
2208 else if (HOST_BITS_PER_WIDE_INT >= prec
2209 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2210 /* This case is not handled correctly below. */
2211 return NULL;
2213 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2214 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2216 add = true;
2217 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2219 if (post_shift >= prec)
2220 return NULL;
2222 /* t1 = oprnd1 h* ml; */
2223 t1 = vect_recog_temp_ssa_var (itype, NULL);
2224 def_stmt
2225 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2226 build_int_cst (itype, ml));
2227 append_pattern_def_seq (stmt_vinfo, def_stmt);
2229 if (add)
2231 /* t2 = t1 + oprnd0; */
2232 t2 = vect_recog_temp_ssa_var (itype, NULL);
2233 def_stmt
2234 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2235 append_pattern_def_seq (stmt_vinfo, def_stmt);
2237 else
2238 t2 = t1;
2240 if (post_shift)
2242 /* t3 = t2 >> post_shift; */
2243 t3 = vect_recog_temp_ssa_var (itype, NULL);
2244 def_stmt
2245 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2246 build_int_cst (itype, post_shift));
2247 append_pattern_def_seq (stmt_vinfo, def_stmt);
2249 else
2250 t3 = t2;
2252 /* t4 = oprnd0 >> (prec - 1); */
2253 t4 = vect_recog_temp_ssa_var (itype, NULL);
2254 def_stmt
2255 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2256 build_int_cst (itype, prec - 1));
2257 append_pattern_def_seq (stmt_vinfo, def_stmt);
2259 /* q = t3 - t4; or q = t4 - t3; */
2260 q = vect_recog_temp_ssa_var (itype, NULL);
2261 pattern_stmt
2262 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2263 d < 0 ? t3 : t4);
2266 if (rhs_code == TRUNC_MOD_EXPR)
2268 tree r, t1;
2270 /* We divided. Now finish by:
2271 t1 = q * oprnd1;
2272 r = oprnd0 - t1; */
2273 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2275 t1 = vect_recog_temp_ssa_var (itype, NULL);
2276 def_stmt
2277 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2278 append_pattern_def_seq (stmt_vinfo, def_stmt);
2280 r = vect_recog_temp_ssa_var (itype, NULL);
2281 pattern_stmt
2282 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2285 /* Pattern detected. */
2286 if (dump_enabled_p ())
2288 dump_printf_loc (MSG_NOTE, vect_location,
2289 "vect_recog_divmod_pattern: detected: ");
2290 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2293 stmts->safe_push (last_stmt);
2295 *type_in = vectype;
2296 *type_out = vectype;
2297 return pattern_stmt;
2300 /* Function vect_recog_mixed_size_cond_pattern
2302 Try to find the following pattern:
2304 type x_t, y_t;
2305 TYPE a_T, b_T, c_T;
2306 loop:
2307 S1 a_T = x_t CMP y_t ? b_T : c_T;
2309 where type 'TYPE' is an integral type which has different size
2310 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2311 than 'type', the constants need to fit into an integer type
2312 with the same width as 'type') or results of conversion from 'type'.
2314 Input:
2316 * LAST_STMT: A stmt from which the pattern search begins.
2318 Output:
2320 * TYPE_IN: The type of the input arguments to the pattern.
2322 * TYPE_OUT: The type of the output of this pattern.
2324 * Return value: A new stmt that will be used to replace the pattern.
2325 Additionally a def_stmt is added.
2327 a_it = x_t CMP y_t ? b_it : c_it;
2328 a_T = (TYPE) a_it; */
2330 static gimple
2331 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2332 tree *type_out)
2334 gimple last_stmt = (*stmts)[0];
2335 tree cond_expr, then_clause, else_clause;
2336 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2337 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2338 enum machine_mode cmpmode;
2339 gimple pattern_stmt, def_stmt;
2340 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2341 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2342 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2343 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2344 bool promotion;
2345 tree comp_scalar_type;
2347 if (!is_gimple_assign (last_stmt)
2348 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2349 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2350 return NULL;
2352 cond_expr = gimple_assign_rhs1 (last_stmt);
2353 then_clause = gimple_assign_rhs2 (last_stmt);
2354 else_clause = gimple_assign_rhs3 (last_stmt);
2356 if (!COMPARISON_CLASS_P (cond_expr))
2357 return NULL;
2359 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2360 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2361 if (comp_vectype == NULL_TREE)
2362 return NULL;
2364 type = gimple_expr_type (last_stmt);
2365 if (types_compatible_p (type, comp_scalar_type)
2366 || ((TREE_CODE (then_clause) != INTEGER_CST
2367 || TREE_CODE (else_clause) != INTEGER_CST)
2368 && !INTEGRAL_TYPE_P (comp_scalar_type))
2369 || !INTEGRAL_TYPE_P (type))
2370 return NULL;
2372 if ((TREE_CODE (then_clause) != INTEGER_CST
2373 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2374 &def_stmt0, &promotion))
2375 || (TREE_CODE (else_clause) != INTEGER_CST
2376 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2377 &def_stmt1, &promotion)))
2378 return NULL;
2380 if (orig_type0 && orig_type1
2381 && !types_compatible_p (orig_type0, orig_type1))
2382 return NULL;
2384 if (orig_type0)
2386 if (!types_compatible_p (orig_type0, comp_scalar_type))
2387 return NULL;
2388 then_clause = gimple_assign_rhs1 (def_stmt0);
2389 itype = orig_type0;
2392 if (orig_type1)
2394 if (!types_compatible_p (orig_type1, comp_scalar_type))
2395 return NULL;
2396 else_clause = gimple_assign_rhs1 (def_stmt1);
2397 itype = orig_type1;
2400 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2402 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2403 return NULL;
2405 vectype = get_vectype_for_scalar_type (type);
2406 if (vectype == NULL_TREE)
2407 return NULL;
2409 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2410 return NULL;
2412 if (itype == NULL_TREE)
2413 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2414 TYPE_UNSIGNED (type));
2416 if (itype == NULL_TREE
2417 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2418 return NULL;
2420 vecitype = get_vectype_for_scalar_type (itype);
2421 if (vecitype == NULL_TREE)
2422 return NULL;
2424 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2425 return NULL;
2427 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2429 if ((TREE_CODE (then_clause) == INTEGER_CST
2430 && !int_fits_type_p (then_clause, itype))
2431 || (TREE_CODE (else_clause) == INTEGER_CST
2432 && !int_fits_type_p (else_clause, itype)))
2433 return NULL;
2436 def_stmt
2437 = gimple_build_assign_with_ops (COND_EXPR,
2438 vect_recog_temp_ssa_var (itype, NULL),
2439 unshare_expr (cond_expr),
2440 fold_convert (itype, then_clause),
2441 fold_convert (itype, else_clause));
2442 pattern_stmt
2443 = gimple_build_assign_with_ops (NOP_EXPR,
2444 vect_recog_temp_ssa_var (type, NULL),
2445 gimple_assign_lhs (def_stmt), NULL_TREE);
2447 new_pattern_def_seq (stmt_vinfo, def_stmt);
2448 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2449 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2450 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2451 *type_in = vecitype;
2452 *type_out = vectype;
2454 if (dump_enabled_p ())
2455 dump_printf_loc (MSG_NOTE, vect_location,
2456 "vect_recog_mixed_size_cond_pattern: detected: ");
2458 return pattern_stmt;
2462 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2463 true if bool VAR can be optimized that way. */
2465 static bool
2466 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2468 gimple def_stmt;
2469 enum vect_def_type dt;
2470 tree def, rhs1;
2471 enum tree_code rhs_code;
2473 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2474 &dt))
2475 return false;
2477 if (dt != vect_internal_def)
2478 return false;
2480 if (!is_gimple_assign (def_stmt))
2481 return false;
2483 if (!has_single_use (def))
2484 return false;
2486 rhs1 = gimple_assign_rhs1 (def_stmt);
2487 rhs_code = gimple_assign_rhs_code (def_stmt);
2488 switch (rhs_code)
2490 case SSA_NAME:
2491 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2493 CASE_CONVERT:
2494 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2495 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2496 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2497 return false;
2498 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2500 case BIT_NOT_EXPR:
2501 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2503 case BIT_AND_EXPR:
2504 case BIT_IOR_EXPR:
2505 case BIT_XOR_EXPR:
2506 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2507 return false;
2508 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2509 bb_vinfo);
2511 default:
2512 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2514 tree vecitype, comp_vectype;
2516 /* If the comparison can throw, then is_gimple_condexpr will be
2517 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2518 if (stmt_could_throw_p (def_stmt))
2519 return false;
2521 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2522 if (comp_vectype == NULL_TREE)
2523 return false;
2525 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2527 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2528 tree itype
2529 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2530 vecitype = get_vectype_for_scalar_type (itype);
2531 if (vecitype == NULL_TREE)
2532 return false;
2534 else
2535 vecitype = comp_vectype;
2536 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2538 return false;
2543 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2544 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2545 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2547 static tree
2548 adjust_bool_pattern_cast (tree type, tree var)
2550 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2551 gimple cast_stmt, pattern_stmt;
2553 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2554 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2555 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2556 cast_stmt
2557 = gimple_build_assign_with_ops (NOP_EXPR,
2558 vect_recog_temp_ssa_var (type, NULL),
2559 gimple_assign_lhs (pattern_stmt),
2560 NULL_TREE);
2561 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2562 return gimple_assign_lhs (cast_stmt);
2566 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2567 recursively. VAR is an SSA_NAME that should be transformed from bool
2568 to a wider integer type, OUT_TYPE is the desired final integer type of
2569 the whole pattern, TRUEVAL should be NULL unless optimizing
2570 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2571 in the then_clause, STMTS is where statements with added pattern stmts
2572 should be pushed to. */
2574 static tree
2575 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2576 vec<gimple> *stmts)
2578 gimple stmt = SSA_NAME_DEF_STMT (var);
2579 enum tree_code rhs_code, def_rhs_code;
2580 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2581 location_t loc;
2582 gimple pattern_stmt, def_stmt;
2584 rhs1 = gimple_assign_rhs1 (stmt);
2585 rhs2 = gimple_assign_rhs2 (stmt);
2586 rhs_code = gimple_assign_rhs_code (stmt);
2587 loc = gimple_location (stmt);
2588 switch (rhs_code)
2590 case SSA_NAME:
2591 CASE_CONVERT:
2592 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2593 itype = TREE_TYPE (irhs1);
2594 pattern_stmt
2595 = gimple_build_assign_with_ops (SSA_NAME,
2596 vect_recog_temp_ssa_var (itype, NULL),
2597 irhs1, NULL_TREE);
2598 break;
2600 case BIT_NOT_EXPR:
2601 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2602 itype = TREE_TYPE (irhs1);
2603 pattern_stmt
2604 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2605 vect_recog_temp_ssa_var (itype, NULL),
2606 irhs1, build_int_cst (itype, 1));
2607 break;
2609 case BIT_AND_EXPR:
2610 /* Try to optimize x = y & (a < b ? 1 : 0); into
2611 x = (a < b ? y : 0);
2613 E.g. for:
2614 bool a_b, b_b, c_b;
2615 TYPE d_T;
2617 S1 a_b = x1 CMP1 y1;
2618 S2 b_b = x2 CMP2 y2;
2619 S3 c_b = a_b & b_b;
2620 S4 d_T = (TYPE) c_b;
2622 we would normally emit:
2624 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2625 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2626 S3' c_T = a_T & b_T;
2627 S4' d_T = c_T;
2629 but we can save one stmt by using the
2630 result of one of the COND_EXPRs in the other COND_EXPR and leave
2631 BIT_AND_EXPR stmt out:
2633 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2634 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2635 S4' f_T = c_T;
2637 At least when VEC_COND_EXPR is implemented using masks
2638 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2639 computes the comparison masks and ands it, in one case with
2640 all ones vector, in the other case with a vector register.
2641 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2642 often more expensive. */
2643 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2644 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2645 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2647 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2648 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2649 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2650 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2652 gimple tstmt;
2653 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2654 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2655 tstmt = stmts->pop ();
2656 gcc_assert (tstmt == def_stmt);
2657 stmts->quick_push (stmt);
2658 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2659 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2660 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2661 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2662 return irhs2;
2664 else
2665 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2666 goto and_ior_xor;
2668 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2669 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2670 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2672 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2673 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2674 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2675 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2677 gimple tstmt;
2678 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2679 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2680 tstmt = stmts->pop ();
2681 gcc_assert (tstmt == def_stmt);
2682 stmts->quick_push (stmt);
2683 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2684 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2685 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2686 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2687 return irhs1;
2689 else
2690 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2691 goto and_ior_xor;
2693 /* FALLTHRU */
2694 case BIT_IOR_EXPR:
2695 case BIT_XOR_EXPR:
2696 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2697 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2698 and_ior_xor:
2699 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2700 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2702 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2703 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2704 int out_prec = TYPE_PRECISION (out_type);
2705 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2706 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2707 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2708 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2709 else
2711 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2712 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2715 itype = TREE_TYPE (irhs1);
2716 pattern_stmt
2717 = gimple_build_assign_with_ops (rhs_code,
2718 vect_recog_temp_ssa_var (itype, NULL),
2719 irhs1, irhs2);
2720 break;
2722 default:
2723 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2724 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2725 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2726 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2727 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2729 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2730 itype
2731 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2733 else
2734 itype = TREE_TYPE (rhs1);
2735 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2736 if (trueval == NULL_TREE)
2737 trueval = build_int_cst (itype, 1);
2738 else
2739 gcc_checking_assert (useless_type_conversion_p (itype,
2740 TREE_TYPE (trueval)));
2741 pattern_stmt
2742 = gimple_build_assign_with_ops (COND_EXPR,
2743 vect_recog_temp_ssa_var (itype, NULL),
2744 cond_expr, trueval,
2745 build_int_cst (itype, 0));
2746 break;
2749 stmts->safe_push (stmt);
2750 gimple_set_location (pattern_stmt, loc);
2751 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2752 return gimple_assign_lhs (pattern_stmt);
2756 /* Function vect_recog_bool_pattern
2758 Try to find pattern like following:
2760 bool a_b, b_b, c_b, d_b, e_b;
2761 TYPE f_T;
2762 loop:
2763 S1 a_b = x1 CMP1 y1;
2764 S2 b_b = x2 CMP2 y2;
2765 S3 c_b = a_b & b_b;
2766 S4 d_b = x3 CMP3 y3;
2767 S5 e_b = c_b | d_b;
2768 S6 f_T = (TYPE) e_b;
2770 where type 'TYPE' is an integral type.
2772 Input:
2774 * LAST_STMT: A stmt at the end from which the pattern
2775 search begins, i.e. cast of a bool to
2776 an integer type.
2778 Output:
2780 * TYPE_IN: The type of the input arguments to the pattern.
2782 * TYPE_OUT: The type of the output of this pattern.
2784 * Return value: A new stmt that will be used to replace the pattern.
2786 Assuming size of TYPE is the same as size of all comparisons
2787 (otherwise some casts would be added where needed), the above
2788 sequence we create related pattern stmts:
2789 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2790 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2791 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2792 S5' e_T = c_T | d_T;
2793 S6' f_T = e_T;
2795 Instead of the above S3' we could emit:
2796 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2797 S3' c_T = a_T | b_T;
2798 but the above is more efficient. */
2800 static gimple
2801 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2802 tree *type_out)
2804 gimple last_stmt = stmts->pop ();
2805 enum tree_code rhs_code;
2806 tree var, lhs, rhs, vectype;
2807 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2808 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2809 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2810 gimple pattern_stmt;
2812 if (!is_gimple_assign (last_stmt))
2813 return NULL;
2815 var = gimple_assign_rhs1 (last_stmt);
2816 lhs = gimple_assign_lhs (last_stmt);
2818 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2819 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2820 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2821 return NULL;
2823 rhs_code = gimple_assign_rhs_code (last_stmt);
2824 if (CONVERT_EXPR_CODE_P (rhs_code))
2826 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2827 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2828 return NULL;
2829 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2830 if (vectype == NULL_TREE)
2831 return NULL;
2833 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2834 return NULL;
2836 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2837 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2838 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2839 pattern_stmt
2840 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2841 else
2842 pattern_stmt
2843 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2844 *type_out = vectype;
2845 *type_in = vectype;
2846 stmts->safe_push (last_stmt);
2847 if (dump_enabled_p ())
2848 dump_printf_loc (MSG_NOTE, vect_location,
2849 "vect_recog_bool_pattern: detected: ");
2851 return pattern_stmt;
2853 else if (rhs_code == SSA_NAME
2854 && STMT_VINFO_DATA_REF (stmt_vinfo))
2856 stmt_vec_info pattern_stmt_info;
2857 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2858 gcc_assert (vectype != NULL_TREE);
2859 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2860 return NULL;
2861 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2862 return NULL;
2864 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2865 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2866 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2868 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2869 gimple cast_stmt
2870 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2871 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2872 rhs = rhs2;
2874 pattern_stmt
2875 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2876 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2877 bb_vinfo);
2878 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2879 STMT_VINFO_DATA_REF (pattern_stmt_info)
2880 = STMT_VINFO_DATA_REF (stmt_vinfo);
2881 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2882 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2883 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2884 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2885 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2886 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2887 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2888 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2889 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2890 *type_out = vectype;
2891 *type_in = vectype;
2892 stmts->safe_push (last_stmt);
2893 if (dump_enabled_p ())
2894 dump_printf_loc (MSG_NOTE, vect_location,
2895 "vect_recog_bool_pattern: detected: ");
2896 return pattern_stmt;
2898 else
2899 return NULL;
2903 /* Mark statements that are involved in a pattern. */
2905 static inline void
2906 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2907 tree pattern_vectype)
2909 stmt_vec_info pattern_stmt_info, def_stmt_info;
2910 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2911 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2912 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2913 gimple def_stmt;
2915 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2916 if (pattern_stmt_info == NULL)
2918 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2919 bb_vinfo);
2920 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2922 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2924 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2925 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2926 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2927 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2928 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2929 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2930 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2931 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2932 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2934 gimple_stmt_iterator si;
2935 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2936 !gsi_end_p (si); gsi_next (&si))
2938 def_stmt = gsi_stmt (si);
2939 def_stmt_info = vinfo_for_stmt (def_stmt);
2940 if (def_stmt_info == NULL)
2942 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2943 bb_vinfo);
2944 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2946 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2947 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2948 STMT_VINFO_DEF_TYPE (def_stmt_info)
2949 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2950 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2951 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2956 /* Function vect_pattern_recog_1
2958 Input:
2959 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2960 computation pattern.
2961 STMT: A stmt from which the pattern search should start.
2963 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2964 expression that computes the same functionality and can be used to
2965 replace the sequence of stmts that are involved in the pattern.
2967 Output:
2968 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2969 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2970 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2971 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2972 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2973 to the available target pattern.
2975 This function also does some bookkeeping, as explained in the documentation
2976 for vect_recog_pattern. */
2978 static void
2979 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2980 gimple_stmt_iterator si,
2981 vec<gimple> *stmts_to_replace)
2983 gimple stmt = gsi_stmt (si), pattern_stmt;
2984 stmt_vec_info stmt_info;
2985 loop_vec_info loop_vinfo;
2986 tree pattern_vectype;
2987 tree type_in, type_out;
2988 enum tree_code code;
2989 int i;
2990 gimple next;
2992 stmts_to_replace->truncate (0);
2993 stmts_to_replace->quick_push (stmt);
2994 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2995 if (!pattern_stmt)
2996 return;
2998 stmt = stmts_to_replace->last ();
2999 stmt_info = vinfo_for_stmt (stmt);
3000 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3002 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3004 /* No need to check target support (already checked by the pattern
3005 recognition function). */
3006 pattern_vectype = type_out ? type_out : type_in;
3008 else
3010 enum machine_mode vec_mode;
3011 enum insn_code icode;
3012 optab optab;
3014 /* Check target support */
3015 type_in = get_vectype_for_scalar_type (type_in);
3016 if (!type_in)
3017 return;
3018 if (type_out)
3019 type_out = get_vectype_for_scalar_type (type_out);
3020 else
3021 type_out = type_in;
3022 if (!type_out)
3023 return;
3024 pattern_vectype = type_out;
3026 if (is_gimple_assign (pattern_stmt))
3027 code = gimple_assign_rhs_code (pattern_stmt);
3028 else
3030 gcc_assert (is_gimple_call (pattern_stmt));
3031 code = CALL_EXPR;
3034 optab = optab_for_tree_code (code, type_in, optab_default);
3035 vec_mode = TYPE_MODE (type_in);
3036 if (!optab
3037 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3038 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3039 return;
3042 /* Found a vectorizable pattern. */
3043 if (dump_enabled_p ())
3045 dump_printf_loc (MSG_NOTE, vect_location,
3046 "pattern recognized: ");
3047 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3050 /* Mark the stmts that are involved in the pattern. */
3051 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3053 /* Patterns cannot be vectorized using SLP, because they change the order of
3054 computation. */
3055 if (loop_vinfo)
3056 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3057 if (next == stmt)
3058 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3060 /* It is possible that additional pattern stmts are created and inserted in
3061 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3062 relevant statements. */
3063 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3064 && (unsigned) i < (stmts_to_replace->length () - 1);
3065 i++)
3067 stmt_info = vinfo_for_stmt (stmt);
3068 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3069 if (dump_enabled_p ())
3071 dump_printf_loc (MSG_NOTE, vect_location,
3072 "additional pattern stmt: ");
3073 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3076 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3081 /* Function vect_pattern_recog
3083 Input:
3084 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3085 computation idioms.
3087 Output - for each computation idiom that is detected we create a new stmt
3088 that provides the same functionality and that can be vectorized. We
3089 also record some information in the struct_stmt_info of the relevant
3090 stmts, as explained below:
3092 At the entry to this function we have the following stmts, with the
3093 following initial value in the STMT_VINFO fields:
3095 stmt in_pattern_p related_stmt vec_stmt
3096 S1: a_i = .... - - -
3097 S2: a_2 = ..use(a_i).. - - -
3098 S3: a_1 = ..use(a_2).. - - -
3099 S4: a_0 = ..use(a_1).. - - -
3100 S5: ... = ..use(a_0).. - - -
3102 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3103 represented by a single stmt. We then:
3104 - create a new stmt S6 equivalent to the pattern (the stmt is not
3105 inserted into the code)
3106 - fill in the STMT_VINFO fields as follows:
3108 in_pattern_p related_stmt vec_stmt
3109 S1: a_i = .... - - -
3110 S2: a_2 = ..use(a_i).. - - -
3111 S3: a_1 = ..use(a_2).. - - -
3112 S4: a_0 = ..use(a_1).. true S6 -
3113 '---> S6: a_new = .... - S4 -
3114 S5: ... = ..use(a_0).. - - -
3116 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3117 to each other through the RELATED_STMT field).
3119 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3120 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3121 remain irrelevant unless used by stmts other than S4.
3123 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3124 (because they are marked as irrelevant). It will vectorize S6, and record
3125 a pointer to the new vector stmt VS6 from S6 (as usual).
3126 S4 will be skipped, and S5 will be vectorized as usual:
3128 in_pattern_p related_stmt vec_stmt
3129 S1: a_i = .... - - -
3130 S2: a_2 = ..use(a_i).. - - -
3131 S3: a_1 = ..use(a_2).. - - -
3132 > VS6: va_new = .... - - -
3133 S4: a_0 = ..use(a_1).. true S6 VS6
3134 '---> S6: a_new = .... - S4 VS6
3135 > VS5: ... = ..vuse(va_new).. - - -
3136 S5: ... = ..use(a_0).. - - -
3138 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3139 elsewhere), and we'll end up with:
3141 VS6: va_new = ....
3142 VS5: ... = ..vuse(va_new)..
3144 In case of more than one pattern statements, e.g., widen-mult with
3145 intermediate type:
3147 S1 a_t = ;
3148 S2 a_T = (TYPE) a_t;
3149 '--> S3: a_it = (interm_type) a_t;
3150 S4 prod_T = a_T * CONST;
3151 '--> S5: prod_T' = a_it w* CONST;
3153 there may be other users of a_T outside the pattern. In that case S2 will
3154 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3155 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3156 be recorded in S3. */
3158 void
3159 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3161 struct loop *loop;
3162 basic_block *bbs;
3163 unsigned int nbbs;
3164 gimple_stmt_iterator si;
3165 unsigned int i, j;
3166 vect_recog_func_ptr vect_recog_func;
3167 vec<gimple> stmts_to_replace;
3168 stmts_to_replace.create (1);
3169 gimple stmt;
3171 if (dump_enabled_p ())
3172 dump_printf_loc (MSG_NOTE, vect_location,
3173 "=== vect_pattern_recog ===");
3175 if (loop_vinfo)
3177 loop = LOOP_VINFO_LOOP (loop_vinfo);
3178 bbs = LOOP_VINFO_BBS (loop_vinfo);
3179 nbbs = loop->num_nodes;
3181 else
3183 bbs = &BB_VINFO_BB (bb_vinfo);
3184 nbbs = 1;
3187 /* Scan through the loop stmts, applying the pattern recognition
3188 functions starting at each stmt visited: */
3189 for (i = 0; i < nbbs; i++)
3191 basic_block bb = bbs[i];
3192 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3194 if (bb_vinfo && (stmt = gsi_stmt (si))
3195 && vinfo_for_stmt (stmt)
3196 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3197 continue;
3199 /* Scan over all generic vect_recog_xxx_pattern functions. */
3200 for (j = 0; j < NUM_PATTERNS; j++)
3202 vect_recog_func = vect_vect_recog_func_ptrs[j];
3203 vect_pattern_recog_1 (vect_recog_func, si,
3204 &stmts_to_replace);
3209 stmts_to_replace.release ();