* src/powerpc/aix_closure.S (ffi_closure_ASM): Adjust for Darwin64
[official-gcc.git] / gcc / tree-vect-patterns.c
blobd95502103f74adb4b5f646cba3ad544238e136b3
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 tree *, tree *);
56 static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_over_widening_pattern,
67 vect_recog_widen_shift_pattern,
68 vect_recog_vector_vector_shift_pattern,
69 vect_recog_sdivmod_pow2_pattern,
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
73 static inline void
74 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 stmt);
80 static inline void
81 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
87 /* Function widened_name_p
89 Check whether NAME, an ssa-name used in USE_STMT,
90 is a result of a type-promotion, such that:
91 DEF_STMT: NAME = NOP (name0)
92 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
93 If CHECK_SIGN is TRUE, check that either both types are signed or both are
94 unsigned. */
96 static bool
97 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
98 bool check_sign)
100 tree dummy;
101 gimple dummy_gimple;
102 loop_vec_info loop_vinfo;
103 stmt_vec_info stmt_vinfo;
104 tree type = TREE_TYPE (name);
105 tree oprnd0;
106 enum vect_def_type dt;
107 tree def;
109 stmt_vinfo = vinfo_for_stmt (use_stmt);
110 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
112 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, NULL, def_stmt, &def,
113 &dt))
114 return false;
116 if (dt != vect_internal_def
117 && dt != vect_external_def && dt != vect_constant_def)
118 return false;
120 if (! *def_stmt)
121 return false;
123 if (!is_gimple_assign (*def_stmt))
124 return false;
126 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
127 return false;
129 oprnd0 = gimple_assign_rhs1 (*def_stmt);
131 *half_type = TREE_TYPE (oprnd0);
132 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
133 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
134 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
135 return false;
137 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
138 NULL, &dummy_gimple, &dummy, &dt))
139 return false;
141 return true;
144 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
145 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
147 static tree
148 vect_recog_temp_ssa_var (tree type, gimple stmt)
150 tree var = create_tmp_var (type, "patt");
152 add_referenced_var (var);
153 var = make_ssa_name (var, stmt);
154 return var;
157 /* Function vect_recog_dot_prod_pattern
159 Try to find the following pattern:
161 type x_t, y_t;
162 TYPE1 prod;
163 TYPE2 sum = init;
164 loop:
165 sum_0 = phi <init, sum_1>
166 S1 x_t = ...
167 S2 y_t = ...
168 S3 x_T = (TYPE1) x_t;
169 S4 y_T = (TYPE1) y_t;
170 S5 prod = x_T * y_T;
171 [S6 prod = (TYPE2) prod; #optional]
172 S7 sum_1 = prod + sum_0;
174 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
175 same size of 'TYPE1' or bigger. This is a special case of a reduction
176 computation.
178 Input:
180 * STMTS: Contains a stmt from which the pattern search begins. In the
181 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
182 will be detected.
184 Output:
186 * TYPE_IN: The type of the input arguments to the pattern.
188 * TYPE_OUT: The type of the output of this pattern.
190 * Return value: A new stmt that will be used to replace the sequence of
191 stmts that constitute the pattern. In this case it will be:
192 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
194 Note: The dot-prod idiom is a widening reduction pattern that is
195 vectorized without preserving all the intermediate results. It
196 produces only N/2 (widened) results (by summing up pairs of
197 intermediate results) rather than all N results. Therefore, we
198 cannot allow this pattern when we want to get all the results and in
199 the correct order (as is the case when this computation is in an
200 inner-loop nested in an outer-loop that us being vectorized). */
202 static gimple
203 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
204 tree *type_out)
206 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
207 tree oprnd0, oprnd1;
208 tree oprnd00, oprnd01;
209 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
210 tree type, half_type;
211 gimple pattern_stmt;
212 tree prod_type;
213 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
214 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
215 tree var;
217 if (!is_gimple_assign (last_stmt))
218 return NULL;
220 type = gimple_expr_type (last_stmt);
222 /* Look for the following pattern
223 DX = (TYPE1) X;
224 DY = (TYPE1) Y;
225 DPROD = DX * DY;
226 DDPROD = (TYPE2) DPROD;
227 sum_1 = DDPROD + sum_0;
228 In which
229 - DX is double the size of X
230 - DY is double the size of Y
231 - DX, DY, DPROD all have the same type
232 - sum is the same size of DPROD or bigger
233 - sum has been recognized as a reduction variable.
235 This is equivalent to:
236 DPROD = X w* Y; #widen mult
237 sum_1 = DPROD w+ sum_0; #widen summation
239 DPROD = X w* Y; #widen mult
240 sum_1 = DPROD + sum_0; #summation
243 /* Starting from LAST_STMT, follow the defs of its uses in search
244 of the above pattern. */
246 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
247 return NULL;
249 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
251 /* Has been detected as widening-summation? */
253 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
254 type = gimple_expr_type (stmt);
255 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
256 return NULL;
257 oprnd0 = gimple_assign_rhs1 (stmt);
258 oprnd1 = gimple_assign_rhs2 (stmt);
259 half_type = TREE_TYPE (oprnd0);
261 else
263 gimple def_stmt;
265 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
266 return NULL;
267 oprnd0 = gimple_assign_rhs1 (last_stmt);
268 oprnd1 = gimple_assign_rhs2 (last_stmt);
269 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
270 || !types_compatible_p (TREE_TYPE (oprnd1), type))
271 return NULL;
272 stmt = last_stmt;
274 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
276 stmt = def_stmt;
277 oprnd0 = gimple_assign_rhs1 (stmt);
279 else
280 half_type = type;
283 /* So far so good. Since last_stmt was detected as a (summation) reduction,
284 we know that oprnd1 is the reduction variable (defined by a loop-header
285 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
286 Left to check that oprnd0 is defined by a (widen_)mult_expr */
287 if (TREE_CODE (oprnd0) != SSA_NAME)
288 return NULL;
290 prod_type = half_type;
291 stmt = SSA_NAME_DEF_STMT (oprnd0);
293 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
294 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
295 return NULL;
297 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
298 inside the loop (in case we are analyzing an outer-loop). */
299 if (!is_gimple_assign (stmt))
300 return NULL;
301 stmt_vinfo = vinfo_for_stmt (stmt);
302 gcc_assert (stmt_vinfo);
303 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
304 return NULL;
305 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
306 return NULL;
307 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
309 /* Has been detected as a widening multiplication? */
311 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
312 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
313 return NULL;
314 stmt_vinfo = vinfo_for_stmt (stmt);
315 gcc_assert (stmt_vinfo);
316 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
317 oprnd00 = gimple_assign_rhs1 (stmt);
318 oprnd01 = gimple_assign_rhs2 (stmt);
320 else
322 tree half_type0, half_type1;
323 gimple def_stmt;
324 tree oprnd0, oprnd1;
326 oprnd0 = gimple_assign_rhs1 (stmt);
327 oprnd1 = gimple_assign_rhs2 (stmt);
328 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
329 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
330 return NULL;
331 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
332 return NULL;
333 oprnd00 = gimple_assign_rhs1 (def_stmt);
334 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
335 return NULL;
336 oprnd01 = gimple_assign_rhs1 (def_stmt);
337 if (!types_compatible_p (half_type0, half_type1))
338 return NULL;
339 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
340 return NULL;
343 half_type = TREE_TYPE (oprnd00);
344 *type_in = half_type;
345 *type_out = type;
347 /* Pattern detected. Create a stmt to be used to replace the pattern: */
348 var = vect_recog_temp_ssa_var (type, NULL);
349 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
350 oprnd00, oprnd01, oprnd1);
352 if (vect_print_dump_info (REPORT_DETAILS))
354 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
355 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
358 /* We don't allow changing the order of the computation in the inner-loop
359 when doing outer-loop vectorization. */
360 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
362 return pattern_stmt;
366 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
367 and LSHIFT_EXPR.
369 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
370 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
372 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
373 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
374 that satisfies the above restrictions, we can perform a widening opeartion
375 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
376 with a_it = (interm_type) a_t; */
378 static bool
379 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
380 tree const_oprnd, tree *oprnd,
381 VEC (gimple, heap) **stmts, tree type,
382 tree *half_type, gimple def_stmt)
384 tree new_type, new_oprnd, tmp;
385 gimple new_stmt;
386 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
387 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
389 if (code != MULT_EXPR && code != LSHIFT_EXPR)
390 return false;
392 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
393 || (code == LSHIFT_EXPR
394 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
395 != 1))
396 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
398 /* CONST_OPRND is a constant of HALF_TYPE. */
399 *oprnd = gimple_assign_rhs1 (def_stmt);
400 return true;
403 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
404 || !gimple_bb (def_stmt)
405 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
406 || !vinfo_for_stmt (def_stmt))
407 return false;
409 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
410 a type 2 times bigger than HALF_TYPE. */
411 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
412 TYPE_UNSIGNED (type));
413 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
414 || (code == LSHIFT_EXPR
415 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
416 return false;
418 /* Use NEW_TYPE for widening operation. */
419 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
421 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
422 /* Check if the already created pattern stmt is what we need. */
423 if (!is_gimple_assign (new_stmt)
424 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
425 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
426 return false;
428 VEC_safe_push (gimple, heap, *stmts, def_stmt);
429 *oprnd = gimple_assign_lhs (new_stmt);
431 else
433 /* Create a_T = (NEW_TYPE) a_t; */
434 *oprnd = gimple_assign_rhs1 (def_stmt);
435 tmp = create_tmp_var (new_type, NULL);
436 add_referenced_var (tmp);
437 new_oprnd = make_ssa_name (tmp, NULL);
438 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
439 NULL_TREE);
440 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
441 VEC_safe_push (gimple, heap, *stmts, def_stmt);
442 *oprnd = new_oprnd;
445 *half_type = new_type;
446 return true;
450 /* Function vect_recog_widen_mult_pattern
452 Try to find the following pattern:
454 type a_t, b_t;
455 TYPE a_T, b_T, prod_T;
457 S1 a_t = ;
458 S2 b_t = ;
459 S3 a_T = (TYPE) a_t;
460 S4 b_T = (TYPE) b_t;
461 S5 prod_T = a_T * b_T;
463 where type 'TYPE' is at least double the size of type 'type'.
465 Also detect unsgigned cases:
467 unsigned type a_t, b_t;
468 unsigned TYPE u_prod_T;
469 TYPE a_T, b_T, prod_T;
471 S1 a_t = ;
472 S2 b_t = ;
473 S3 a_T = (TYPE) a_t;
474 S4 b_T = (TYPE) b_t;
475 S5 prod_T = a_T * b_T;
476 S6 u_prod_T = (unsigned TYPE) prod_T;
478 and multiplication by constants:
480 type a_t;
481 TYPE a_T, prod_T;
483 S1 a_t = ;
484 S3 a_T = (TYPE) a_t;
485 S5 prod_T = a_T * CONST;
487 A special case of multiplication by constants is when 'TYPE' is 4 times
488 bigger than 'type', but CONST fits an intermediate type 2 times smaller
489 than 'TYPE'. In that case we create an additional pattern stmt for S3
490 to create a variable of the intermediate type, and perform widen-mult
491 on the intermediate type as well:
493 type a_t;
494 interm_type a_it;
495 TYPE a_T, prod_T, prod_T';
497 S1 a_t = ;
498 S3 a_T = (TYPE) a_t;
499 '--> a_it = (interm_type) a_t;
500 S5 prod_T = a_T * CONST;
501 '--> prod_T' = a_it w* CONST;
503 Input/Output:
505 * STMTS: Contains a stmt from which the pattern search begins. In the
506 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
507 is detected. In case of unsigned widen-mult, the original stmt (S5) is
508 replaced with S6 in STMTS. In case of multiplication by a constant
509 of an intermediate type (the last case above), STMTS also contains S3
510 (inserted before S5).
512 Output:
514 * TYPE_IN: The type of the input arguments to the pattern.
516 * TYPE_OUT: The type of the output of this pattern.
518 * Return value: A new stmt that will be used to replace the sequence of
519 stmts that constitute the pattern. In this case it will be:
520 WIDEN_MULT <a_t, b_t>
523 static gimple
524 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
525 tree *type_in, tree *type_out)
527 gimple last_stmt = VEC_pop (gimple, *stmts);
528 gimple def_stmt0, def_stmt1;
529 tree oprnd0, oprnd1;
530 tree type, half_type0, half_type1;
531 gimple pattern_stmt;
532 tree vectype, vectype_out = NULL_TREE;
533 tree dummy;
534 tree var;
535 enum tree_code dummy_code;
536 int dummy_int;
537 VEC (tree, heap) *dummy_vec;
538 bool op1_ok;
540 if (!is_gimple_assign (last_stmt))
541 return NULL;
543 type = gimple_expr_type (last_stmt);
545 /* Starting from LAST_STMT, follow the defs of its uses in search
546 of the above pattern. */
548 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
549 return NULL;
551 oprnd0 = gimple_assign_rhs1 (last_stmt);
552 oprnd1 = gimple_assign_rhs2 (last_stmt);
553 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
554 || !types_compatible_p (TREE_TYPE (oprnd1), type))
555 return NULL;
557 /* Check argument 0. */
558 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
559 return NULL;
560 /* Check argument 1. */
561 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
563 if (op1_ok)
565 oprnd0 = gimple_assign_rhs1 (def_stmt0);
566 oprnd1 = gimple_assign_rhs1 (def_stmt1);
568 else
570 if (TREE_CODE (oprnd1) == INTEGER_CST
571 && TREE_CODE (half_type0) == INTEGER_TYPE
572 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
573 &oprnd0, stmts, type,
574 &half_type0, def_stmt0))
575 half_type1 = half_type0;
576 else
577 return NULL;
580 /* Handle unsigned case. Look for
581 S6 u_prod_T = (unsigned TYPE) prod_T;
582 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
583 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
585 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
586 imm_use_iterator imm_iter;
587 use_operand_p use_p;
588 int nuses = 0;
589 gimple use_stmt = NULL;
590 tree use_type;
592 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
593 return NULL;
595 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
597 if (is_gimple_debug (USE_STMT (use_p)))
598 continue;
599 use_stmt = USE_STMT (use_p);
600 nuses++;
603 if (nuses != 1 || !is_gimple_assign (use_stmt)
604 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
605 return NULL;
607 use_lhs = gimple_assign_lhs (use_stmt);
608 use_type = TREE_TYPE (use_lhs);
609 if (!INTEGRAL_TYPE_P (use_type)
610 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
611 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
612 return NULL;
614 type = use_type;
615 last_stmt = use_stmt;
618 if (!types_compatible_p (half_type0, half_type1))
619 return NULL;
621 /* Pattern detected. */
622 if (vect_print_dump_info (REPORT_DETAILS))
623 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
625 /* Check target support */
626 vectype = get_vectype_for_scalar_type (half_type0);
627 vectype_out = get_vectype_for_scalar_type (type);
628 if (!vectype
629 || !vectype_out
630 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
631 vectype_out, vectype,
632 &dummy, &dummy, &dummy_code,
633 &dummy_code, &dummy_int, &dummy_vec))
634 return NULL;
636 *type_in = vectype;
637 *type_out = vectype_out;
639 /* Pattern supported. Create a stmt to be used to replace the pattern: */
640 var = vect_recog_temp_ssa_var (type, NULL);
641 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
642 oprnd1);
644 if (vect_print_dump_info (REPORT_DETAILS))
645 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
647 VEC_safe_push (gimple, heap, *stmts, last_stmt);
648 return pattern_stmt;
652 /* Function vect_recog_pow_pattern
654 Try to find the following pattern:
656 x = POW (y, N);
658 with POW being one of pow, powf, powi, powif and N being
659 either 2 or 0.5.
661 Input:
663 * LAST_STMT: A stmt from which the pattern search begins.
665 Output:
667 * TYPE_IN: The type of the input arguments to the pattern.
669 * TYPE_OUT: The type of the output of this pattern.
671 * Return value: A new stmt that will be used to replace the sequence of
672 stmts that constitute the pattern. In this case it will be:
673 x = x * x
675 x = sqrt (x)
678 static gimple
679 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
680 tree *type_out)
682 gimple last_stmt = VEC_index (gimple, *stmts, 0);
683 tree fn, base, exp = NULL;
684 gimple stmt;
685 tree var;
687 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
688 return NULL;
690 fn = gimple_call_fndecl (last_stmt);
691 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
692 return NULL;
694 switch (DECL_FUNCTION_CODE (fn))
696 case BUILT_IN_POWIF:
697 case BUILT_IN_POWI:
698 case BUILT_IN_POWF:
699 case BUILT_IN_POW:
700 base = gimple_call_arg (last_stmt, 0);
701 exp = gimple_call_arg (last_stmt, 1);
702 if (TREE_CODE (exp) != REAL_CST
703 && TREE_CODE (exp) != INTEGER_CST)
704 return NULL;
705 break;
707 default:
708 return NULL;
711 /* We now have a pow or powi builtin function call with a constant
712 exponent. */
714 *type_out = NULL_TREE;
716 /* Catch squaring. */
717 if ((host_integerp (exp, 0)
718 && tree_low_cst (exp, 0) == 2)
719 || (TREE_CODE (exp) == REAL_CST
720 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
722 *type_in = TREE_TYPE (base);
724 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
725 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
726 return stmt;
729 /* Catch square root. */
730 if (TREE_CODE (exp) == REAL_CST
731 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
733 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
734 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
735 if (*type_in)
737 gimple stmt = gimple_build_call (newfn, 1, base);
738 if (vectorizable_function (stmt, *type_in, *type_in)
739 != NULL_TREE)
741 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
742 gimple_call_set_lhs (stmt, var);
743 return stmt;
748 return NULL;
752 /* Function vect_recog_widen_sum_pattern
754 Try to find the following pattern:
756 type x_t;
757 TYPE x_T, sum = init;
758 loop:
759 sum_0 = phi <init, sum_1>
760 S1 x_t = *p;
761 S2 x_T = (TYPE) x_t;
762 S3 sum_1 = x_T + sum_0;
764 where type 'TYPE' is at least double the size of type 'type', i.e - we're
765 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
766 a special case of a reduction computation.
768 Input:
770 * LAST_STMT: A stmt from which the pattern search begins. In the example,
771 when this function is called with S3, the pattern {S2,S3} will be detected.
773 Output:
775 * TYPE_IN: The type of the input arguments to the pattern.
777 * TYPE_OUT: The type of the output of this pattern.
779 * Return value: A new stmt that will be used to replace the sequence of
780 stmts that constitute the pattern. In this case it will be:
781 WIDEN_SUM <x_t, sum_0>
783 Note: The widening-sum idiom is a widening reduction pattern that is
784 vectorized without preserving all the intermediate results. It
785 produces only N/2 (widened) results (by summing up pairs of
786 intermediate results) rather than all N results. Therefore, we
787 cannot allow this pattern when we want to get all the results and in
788 the correct order (as is the case when this computation is in an
789 inner-loop nested in an outer-loop that us being vectorized). */
791 static gimple
792 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
793 tree *type_out)
795 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
796 tree oprnd0, oprnd1;
797 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
798 tree type, half_type;
799 gimple pattern_stmt;
800 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
801 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
802 tree var;
804 if (!is_gimple_assign (last_stmt))
805 return NULL;
807 type = gimple_expr_type (last_stmt);
809 /* Look for the following pattern
810 DX = (TYPE) X;
811 sum_1 = DX + sum_0;
812 In which DX is at least double the size of X, and sum_1 has been
813 recognized as a reduction variable.
816 /* Starting from LAST_STMT, follow the defs of its uses in search
817 of the above pattern. */
819 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
820 return NULL;
822 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
823 return NULL;
825 oprnd0 = gimple_assign_rhs1 (last_stmt);
826 oprnd1 = gimple_assign_rhs2 (last_stmt);
827 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
828 || !types_compatible_p (TREE_TYPE (oprnd1), type))
829 return NULL;
831 /* So far so good. Since last_stmt was detected as a (summation) reduction,
832 we know that oprnd1 is the reduction variable (defined by a loop-header
833 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
834 Left to check that oprnd0 is defined by a cast from type 'type' to type
835 'TYPE'. */
837 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
838 return NULL;
840 oprnd0 = gimple_assign_rhs1 (stmt);
841 *type_in = half_type;
842 *type_out = type;
844 /* Pattern detected. Create a stmt to be used to replace the pattern: */
845 var = vect_recog_temp_ssa_var (type, NULL);
846 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
847 oprnd0, oprnd1);
849 if (vect_print_dump_info (REPORT_DETAILS))
851 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
852 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
855 /* We don't allow changing the order of the computation in the inner-loop
856 when doing outer-loop vectorization. */
857 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
859 return pattern_stmt;
863 /* Return TRUE if the operation in STMT can be performed on a smaller type.
865 Input:
866 STMT - a statement to check.
867 DEF - we support operations with two operands, one of which is constant.
868 The other operand can be defined by a demotion operation, or by a
869 previous statement in a sequence of over-promoted operations. In the
870 later case DEF is used to replace that operand. (It is defined by a
871 pattern statement we created for the previous statement in the
872 sequence).
874 Input/output:
875 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
876 NULL, it's the type of DEF.
877 STMTS - additional pattern statements. If a pattern statement (type
878 conversion) is created in this function, its original statement is
879 added to STMTS.
881 Output:
882 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
883 operands to use in the new pattern statement for STMT (will be created
884 in vect_recog_over_widening_pattern ()).
885 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
886 statements for STMT: the first one is a type promotion and the second
887 one is the operation itself. We return the type promotion statement
888 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
889 the second pattern statement. */
891 static bool
892 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
893 tree *op0, tree *op1, gimple *new_def_stmt,
894 VEC (gimple, heap) **stmts)
896 enum tree_code code;
897 tree const_oprnd, oprnd;
898 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
899 gimple def_stmt, new_stmt;
900 bool first = false;
901 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
902 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
904 *op0 = NULL_TREE;
905 *op1 = NULL_TREE;
906 *new_def_stmt = NULL;
908 if (!is_gimple_assign (stmt))
909 return false;
911 code = gimple_assign_rhs_code (stmt);
912 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
913 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
914 return false;
916 oprnd = gimple_assign_rhs1 (stmt);
917 const_oprnd = gimple_assign_rhs2 (stmt);
918 type = gimple_expr_type (stmt);
920 if (TREE_CODE (oprnd) != SSA_NAME
921 || TREE_CODE (const_oprnd) != INTEGER_CST)
922 return false;
924 /* If we are in the middle of a sequence, we use DEF from a previous
925 statement. Otherwise, OPRND has to be a result of type promotion. */
926 if (*new_type)
928 half_type = *new_type;
929 oprnd = def;
931 else
933 first = true;
934 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
935 || !gimple_bb (def_stmt)
936 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
937 || !vinfo_for_stmt (def_stmt))
938 return false;
941 /* Can we perform the operation on a smaller type? */
942 switch (code)
944 case BIT_IOR_EXPR:
945 case BIT_XOR_EXPR:
946 case BIT_AND_EXPR:
947 if (!int_fits_type_p (const_oprnd, half_type))
949 /* HALF_TYPE is not enough. Try a bigger type if possible. */
950 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
951 return false;
953 interm_type = build_nonstandard_integer_type (
954 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
955 if (!int_fits_type_p (const_oprnd, interm_type))
956 return false;
959 break;
961 case LSHIFT_EXPR:
962 /* Try intermediate type - HALF_TYPE is not enough for sure. */
963 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
964 return false;
966 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
967 (e.g., if the original value was char, the shift amount is at most 8
968 if we want to use short). */
969 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
970 return false;
972 interm_type = build_nonstandard_integer_type (
973 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
975 if (!vect_supportable_shift (code, interm_type))
976 return false;
978 break;
980 case RSHIFT_EXPR:
981 if (vect_supportable_shift (code, half_type))
982 break;
984 /* Try intermediate type - HALF_TYPE is not supported. */
985 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
986 return false;
988 interm_type = build_nonstandard_integer_type (
989 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
991 if (!vect_supportable_shift (code, interm_type))
992 return false;
994 break;
996 default:
997 gcc_unreachable ();
1000 /* There are four possible cases:
1001 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1002 the first statement in the sequence)
1003 a. The original, HALF_TYPE, is not enough - we replace the promotion
1004 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1005 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1006 promotion.
1007 2. OPRND is defined by a pattern statement we created.
1008 a. Its type is not sufficient for the operation, we create a new stmt:
1009 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1010 this statement in NEW_DEF_STMT, and it is later put in
1011 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1012 b. OPRND is good to use in the new statement. */
1013 if (first)
1015 if (interm_type)
1017 /* Replace the original type conversion HALF_TYPE->TYPE with
1018 HALF_TYPE->INTERM_TYPE. */
1019 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1021 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1022 /* Check if the already created pattern stmt is what we need. */
1023 if (!is_gimple_assign (new_stmt)
1024 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1025 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1026 return false;
1028 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1029 oprnd = gimple_assign_lhs (new_stmt);
1031 else
1033 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1034 oprnd = gimple_assign_rhs1 (def_stmt);
1035 tmp = create_tmp_reg (interm_type, NULL);
1036 add_referenced_var (tmp);
1037 new_oprnd = make_ssa_name (tmp, NULL);
1038 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1039 oprnd, NULL_TREE);
1040 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1041 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1042 oprnd = new_oprnd;
1045 else
1047 /* Retrieve the operand before the type promotion. */
1048 oprnd = gimple_assign_rhs1 (def_stmt);
1051 else
1053 if (interm_type)
1055 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1056 tmp = create_tmp_reg (interm_type, NULL);
1057 add_referenced_var (tmp);
1058 new_oprnd = make_ssa_name (tmp, NULL);
1059 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1060 oprnd, NULL_TREE);
1061 oprnd = new_oprnd;
1062 *new_def_stmt = new_stmt;
1065 /* Otherwise, OPRND is already set. */
1068 if (interm_type)
1069 *new_type = interm_type;
1070 else
1071 *new_type = half_type;
1073 *op0 = oprnd;
1074 *op1 = fold_convert (*new_type, const_oprnd);
1076 return true;
1080 /* Try to find a statement or a sequence of statements that can be performed
1081 on a smaller type:
1083 type x_t;
1084 TYPE x_T, res0_T, res1_T;
1085 loop:
1086 S1 x_t = *p;
1087 S2 x_T = (TYPE) x_t;
1088 S3 res0_T = op (x_T, C0);
1089 S4 res1_T = op (res0_T, C1);
1090 S5 ... = () res1_T; - type demotion
1092 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1093 constants.
1094 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1095 be 'type' or some intermediate type. For now, we expect S5 to be a type
1096 demotion operation. We also check that S3 and S4 have only one use. */
1098 static gimple
1099 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1100 tree *type_in, tree *type_out)
1102 gimple stmt = VEC_pop (gimple, *stmts);
1103 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1104 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1105 imm_use_iterator imm_iter;
1106 use_operand_p use_p;
1107 int nuses = 0;
1108 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1109 bool first;
1110 struct loop *loop = (gimple_bb (stmt))->loop_father;
1111 tree type = NULL;
1113 first = true;
1114 while (1)
1116 if (!vinfo_for_stmt (stmt)
1117 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1118 return NULL;
1120 new_def_stmt = NULL;
1121 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1122 &op0, &op1, &new_def_stmt,
1123 stmts))
1125 if (first)
1126 return NULL;
1127 else
1128 break;
1131 /* STMT can be performed on a smaller type. Check its uses. */
1132 lhs = gimple_assign_lhs (stmt);
1133 nuses = 0;
1134 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1136 if (is_gimple_debug (USE_STMT (use_p)))
1137 continue;
1138 use_stmt = USE_STMT (use_p);
1139 nuses++;
1142 if (nuses != 1 || !is_gimple_assign (use_stmt)
1143 || !gimple_bb (use_stmt)
1144 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1145 return NULL;
1147 /* Create pattern statement for STMT. */
1148 vectype = get_vectype_for_scalar_type (new_type);
1149 if (!vectype)
1150 return NULL;
1152 /* We want to collect all the statements for which we create pattern
1153 statetments, except for the case when the last statement in the
1154 sequence doesn't have a corresponding pattern statement. In such
1155 case we associate the last pattern statement with the last statement
1156 in the sequence. Therefore, we only add the original statement to
1157 the list if we know that it is not the last. */
1158 if (prev_stmt)
1159 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1161 var = vect_recog_temp_ssa_var (new_type, NULL);
1162 pattern_stmt
1163 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1164 op0, op1);
1165 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1166 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1168 if (vect_print_dump_info (REPORT_DETAILS))
1170 fprintf (vect_dump, "created pattern stmt: ");
1171 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1174 type = gimple_expr_type (stmt);
1175 prev_stmt = stmt;
1176 stmt = use_stmt;
1178 first = false;
1181 /* We got a sequence. We expect it to end with a type demotion operation.
1182 Otherwise, we quit (for now). There are three possible cases: the
1183 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1184 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1185 NEW_TYPE differs (we create a new conversion statement). */
1186 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1188 use_lhs = gimple_assign_lhs (use_stmt);
1189 use_type = TREE_TYPE (use_lhs);
1190 /* Support only type demotion or signedess change. */
1191 if (!INTEGRAL_TYPE_P (use_type)
1192 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1193 return NULL;
1195 /* Check that NEW_TYPE is not bigger than the conversion result. */
1196 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1197 return NULL;
1199 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1200 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1202 /* Create NEW_TYPE->USE_TYPE conversion. */
1203 tmp = create_tmp_reg (use_type, NULL);
1204 add_referenced_var (tmp);
1205 new_oprnd = make_ssa_name (tmp, NULL);
1206 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1207 var, NULL_TREE);
1208 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1210 *type_in = get_vectype_for_scalar_type (new_type);
1211 *type_out = get_vectype_for_scalar_type (use_type);
1213 /* We created a pattern statement for the last statement in the
1214 sequence, so we don't need to associate it with the pattern
1215 statement created for PREV_STMT. Therefore, we add PREV_STMT
1216 to the list in order to mark it later in vect_pattern_recog_1. */
1217 if (prev_stmt)
1218 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1220 else
1222 if (prev_stmt)
1223 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1224 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1226 *type_in = vectype;
1227 *type_out = NULL_TREE;
1230 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1232 else
1233 /* TODO: support general case, create a conversion to the correct type. */
1234 return NULL;
1236 /* Pattern detected. */
1237 if (vect_print_dump_info (REPORT_DETAILS))
1239 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1240 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1243 return pattern_stmt;
1246 /* Detect widening shift pattern:
1248 type a_t;
1249 TYPE a_T, res_T;
1251 S1 a_t = ;
1252 S2 a_T = (TYPE) a_t;
1253 S3 res_T = a_T << CONST;
1255 where type 'TYPE' is at least double the size of type 'type'.
1257 Also detect unsigned cases:
1259 unsigned type a_t;
1260 unsigned TYPE u_res_T;
1261 TYPE a_T, res_T;
1263 S1 a_t = ;
1264 S2 a_T = (TYPE) a_t;
1265 S3 res_T = a_T << CONST;
1266 S4 u_res_T = (unsigned TYPE) res_T;
1268 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1269 create an additional pattern stmt for S2 to create a variable of an
1270 intermediate type, and perform widen-shift on the intermediate type:
1272 type a_t;
1273 interm_type a_it;
1274 TYPE a_T, res_T, res_T';
1276 S1 a_t = ;
1277 S2 a_T = (TYPE) a_t;
1278 '--> a_it = (interm_type) a_t;
1279 S3 res_T = a_T << CONST;
1280 '--> res_T' = a_it <<* CONST;
1282 Input/Output:
1284 * STMTS: Contains a stmt from which the pattern search begins.
1285 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1286 in STMTS. When an intermediate type is used and a pattern statement is
1287 created for S2, we also put S2 here (before S3).
1289 Output:
1291 * TYPE_IN: The type of the input arguments to the pattern.
1293 * TYPE_OUT: The type of the output of this pattern.
1295 * Return value: A new stmt that will be used to replace the sequence of
1296 stmts that constitute the pattern. In this case it will be:
1297 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1299 static gimple
1300 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1301 tree *type_in, tree *type_out)
1303 gimple last_stmt = VEC_pop (gimple, *stmts);
1304 gimple def_stmt0;
1305 tree oprnd0, oprnd1;
1306 tree type, half_type0;
1307 gimple pattern_stmt, orig_stmt = NULL;
1308 tree vectype, vectype_out = NULL_TREE;
1309 tree dummy;
1310 tree var;
1311 enum tree_code dummy_code;
1312 int dummy_int;
1313 VEC (tree, heap) * dummy_vec;
1314 gimple use_stmt = NULL;
1315 bool over_widen = false;
1317 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1318 return NULL;
1320 orig_stmt = last_stmt;
1321 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1323 /* This statement was also detected as over-widening operation (it can't
1324 be any other pattern, because only over-widening detects shifts).
1325 LAST_STMT is the final type demotion statement, but its related
1326 statement is shift. We analyze the related statement to catch cases:
1328 orig code:
1329 type a_t;
1330 itype res;
1331 TYPE a_T, res_T;
1333 S1 a_T = (TYPE) a_t;
1334 S2 res_T = a_T << CONST;
1335 S3 res = (itype)res_T;
1337 (size of type * 2 <= size of itype
1338 and size of itype * 2 <= size of TYPE)
1340 code after over-widening pattern detection:
1342 S1 a_T = (TYPE) a_t;
1343 --> a_it = (itype) a_t;
1344 S2 res_T = a_T << CONST;
1345 S3 res = (itype)res_T; <--- LAST_STMT
1346 --> res = a_it << CONST;
1348 after widen_shift:
1350 S1 a_T = (TYPE) a_t;
1351 --> a_it = (itype) a_t; - redundant
1352 S2 res_T = a_T << CONST;
1353 S3 res = (itype)res_T;
1354 --> res = a_t w<< CONST;
1356 i.e., we replace the three statements with res = a_t w<< CONST. */
1357 last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1358 over_widen = true;
1361 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1362 return NULL;
1364 oprnd0 = gimple_assign_rhs1 (last_stmt);
1365 oprnd1 = gimple_assign_rhs2 (last_stmt);
1366 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1367 return NULL;
1369 /* Check operand 0: it has to be defined by a type promotion. */
1370 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1371 return NULL;
1373 /* Check operand 1: has to be positive. We check that it fits the type
1374 in vect_handle_widen_op_by_const (). */
1375 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1376 return NULL;
1378 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1379 type = gimple_expr_type (last_stmt);
1381 /* Check if this a widening operation. */
1382 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1383 &oprnd0, stmts,
1384 type, &half_type0, def_stmt0))
1385 return NULL;
1387 /* Handle unsigned case. Look for
1388 S4 u_res_T = (unsigned TYPE) res_T;
1389 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1390 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1392 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1393 imm_use_iterator imm_iter;
1394 use_operand_p use_p;
1395 int nuses = 0;
1396 tree use_type;
1398 if (over_widen)
1400 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1401 We check here that TYPE is the correct type for the operation,
1402 i.e., it's the type of the original result. */
1403 tree orig_type = gimple_expr_type (orig_stmt);
1404 if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1405 || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1406 return NULL;
1408 else
1410 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1412 if (is_gimple_debug (USE_STMT (use_p)))
1413 continue;
1414 use_stmt = USE_STMT (use_p);
1415 nuses++;
1418 if (nuses != 1 || !is_gimple_assign (use_stmt)
1419 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1420 return NULL;
1422 use_lhs = gimple_assign_lhs (use_stmt);
1423 use_type = TREE_TYPE (use_lhs);
1425 if (!INTEGRAL_TYPE_P (use_type)
1426 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1427 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1428 return NULL;
1430 type = use_type;
1434 /* Pattern detected. */
1435 if (vect_print_dump_info (REPORT_DETAILS))
1436 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1438 /* Check target support. */
1439 vectype = get_vectype_for_scalar_type (half_type0);
1440 vectype_out = get_vectype_for_scalar_type (type);
1442 if (!vectype
1443 || !vectype_out
1444 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1445 vectype_out, vectype,
1446 &dummy, &dummy, &dummy_code,
1447 &dummy_code, &dummy_int,
1448 &dummy_vec))
1449 return NULL;
1451 *type_in = vectype;
1452 *type_out = vectype_out;
1454 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1455 var = vect_recog_temp_ssa_var (type, NULL);
1456 pattern_stmt =
1457 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1459 if (vect_print_dump_info (REPORT_DETAILS))
1460 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1462 if (use_stmt)
1463 last_stmt = use_stmt;
1464 else
1465 last_stmt = orig_stmt;
1467 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1468 return pattern_stmt;
1471 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1472 vectorized:
1474 type a_t;
1475 TYPE b_T, res_T;
1477 S1 a_t = ;
1478 S2 b_T = ;
1479 S3 res_T = b_T op a_t;
1481 where type 'TYPE' is a type with different size than 'type',
1482 and op is <<, >> or rotate.
1484 Also detect cases:
1486 type a_t;
1487 TYPE b_T, c_T, res_T;
1489 S0 c_T = ;
1490 S1 a_t = (type) c_T;
1491 S2 b_T = ;
1492 S3 res_T = b_T op a_t;
1494 Input/Output:
1496 * STMTS: Contains a stmt from which the pattern search begins,
1497 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1498 with a shift/rotate which has same type on both operands, in the
1499 second case just b_T op c_T, in the first case with added cast
1500 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1502 Output:
1504 * TYPE_IN: The type of the input arguments to the pattern.
1506 * TYPE_OUT: The type of the output of this pattern.
1508 * Return value: A new stmt that will be used to replace the shift/rotate
1509 S3 stmt. */
1511 static gimple
1512 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1513 tree *type_in, tree *type_out)
1515 gimple last_stmt = VEC_pop (gimple, *stmts);
1516 tree oprnd0, oprnd1, lhs, var;
1517 gimple pattern_stmt, def_stmt;
1518 enum tree_code rhs_code;
1519 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1520 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1521 enum vect_def_type dt;
1522 tree def;
1524 if (!is_gimple_assign (last_stmt))
1525 return NULL;
1527 rhs_code = gimple_assign_rhs_code (last_stmt);
1528 switch (rhs_code)
1530 case LSHIFT_EXPR:
1531 case RSHIFT_EXPR:
1532 case LROTATE_EXPR:
1533 case RROTATE_EXPR:
1534 break;
1535 default:
1536 return NULL;
1539 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1540 return NULL;
1542 lhs = gimple_assign_lhs (last_stmt);
1543 oprnd0 = gimple_assign_rhs1 (last_stmt);
1544 oprnd1 = gimple_assign_rhs2 (last_stmt);
1545 if (TREE_CODE (oprnd0) != SSA_NAME
1546 || TREE_CODE (oprnd1) != SSA_NAME
1547 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1548 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1549 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1550 || TYPE_PRECISION (TREE_TYPE (lhs))
1551 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1552 return NULL;
1554 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, NULL, &def_stmt,
1555 &def, &dt))
1556 return NULL;
1558 if (dt != vect_internal_def)
1559 return NULL;
1561 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1562 *type_out = *type_in;
1563 if (*type_in == NULL_TREE)
1564 return NULL;
1566 def = NULL_TREE;
1567 if (gimple_assign_cast_p (def_stmt))
1569 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1570 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1571 && TYPE_PRECISION (TREE_TYPE (rhs1))
1572 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1573 def = rhs1;
1576 if (def == NULL_TREE)
1578 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1579 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1580 NULL_TREE);
1581 new_pattern_def_seq (stmt_vinfo, def_stmt);
1584 /* Pattern detected. */
1585 if (vect_print_dump_info (REPORT_DETAILS))
1586 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1588 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1589 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1590 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1592 if (vect_print_dump_info (REPORT_DETAILS))
1593 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1595 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1596 return pattern_stmt;
1599 /* Detect a signed division by power of two constant that wouldn't be
1600 otherwise vectorized:
1602 type a_t, b_t;
1604 S1 a_t = b_t / N;
1606 where type 'type' is a signed integral type and N is a constant positive
1607 power of two.
1609 Similarly handle signed modulo by power of two constant:
1611 S4 a_t = b_t % N;
1613 Input/Output:
1615 * STMTS: Contains a stmt from which the pattern search begins,
1616 i.e. the division stmt. S1 is replaced by:
1617 S3 y_t = b_t < 0 ? N - 1 : 0;
1618 S2 x_t = b_t + y_t;
1619 S1' a_t = x_t >> log2 (N);
1621 S4 is replaced by (where *_T temporaries have unsigned type):
1622 S9 y_T = b_t < 0 ? -1U : 0U;
1623 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1624 S7 z_t = (type) z_T;
1625 S6 w_t = b_t + z_t;
1626 S5 x_t = w_t & (N - 1);
1627 S4' a_t = x_t - z_t;
1629 Output:
1631 * TYPE_IN: The type of the input arguments to the pattern.
1633 * TYPE_OUT: The type of the output of this pattern.
1635 * Return value: A new stmt that will be used to replace the division
1636 S1 or modulo S4 stmt. */
1638 static gimple
1639 vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1640 tree *type_in, tree *type_out)
1642 gimple last_stmt = VEC_pop (gimple, *stmts);
1643 tree oprnd0, oprnd1, vectype, itype, cond;
1644 gimple pattern_stmt, def_stmt;
1645 enum tree_code rhs_code;
1646 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1647 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1648 optab optab;
1650 if (!is_gimple_assign (last_stmt))
1651 return NULL;
1653 rhs_code = gimple_assign_rhs_code (last_stmt);
1654 switch (rhs_code)
1656 case TRUNC_DIV_EXPR:
1657 case TRUNC_MOD_EXPR:
1658 break;
1659 default:
1660 return NULL;
1663 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1664 return NULL;
1666 oprnd0 = gimple_assign_rhs1 (last_stmt);
1667 oprnd1 = gimple_assign_rhs2 (last_stmt);
1668 itype = TREE_TYPE (oprnd0);
1669 if (TREE_CODE (oprnd0) != SSA_NAME
1670 || TREE_CODE (oprnd1) != INTEGER_CST
1671 || TREE_CODE (itype) != INTEGER_TYPE
1672 || TYPE_UNSIGNED (itype)
1673 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1674 || !integer_pow2p (oprnd1)
1675 || tree_int_cst_sgn (oprnd1) != 1)
1676 return NULL;
1678 vectype = get_vectype_for_scalar_type (itype);
1679 if (vectype == NULL_TREE)
1680 return NULL;
1682 /* If the target can handle vectorized division or modulo natively,
1683 don't attempt to optimize this. */
1684 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1685 if (optab != NULL)
1687 enum machine_mode vec_mode = TYPE_MODE (vectype);
1688 int icode = (int) optab_handler (optab, vec_mode);
1689 if (icode != CODE_FOR_nothing
1690 || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1691 return NULL;
1694 /* Pattern detected. */
1695 if (vect_print_dump_info (REPORT_DETAILS))
1696 fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1698 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1699 if (rhs_code == TRUNC_DIV_EXPR)
1701 tree var = vect_recog_temp_ssa_var (itype, NULL);
1702 def_stmt
1703 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1704 fold_build2 (MINUS_EXPR, itype,
1705 oprnd1,
1706 build_int_cst (itype,
1707 1)),
1708 build_int_cst (itype, 0));
1709 new_pattern_def_seq (stmt_vinfo, def_stmt);
1710 var = vect_recog_temp_ssa_var (itype, NULL);
1711 def_stmt
1712 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1713 gimple_assign_lhs (def_stmt));
1714 append_pattern_def_seq (stmt_vinfo, def_stmt);
1716 pattern_stmt
1717 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1718 vect_recog_temp_ssa_var (itype, NULL),
1719 var,
1720 build_int_cst (itype,
1721 tree_log2 (oprnd1)));
1723 else
1725 tree signmask;
1726 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1727 if (compare_tree_int (oprnd1, 2) == 0)
1729 signmask = vect_recog_temp_ssa_var (itype, NULL);
1730 def_stmt
1731 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1732 build_int_cst (itype, 1),
1733 build_int_cst (itype, 0));
1734 append_pattern_def_seq (stmt_vinfo, def_stmt);
1736 else
1738 tree utype
1739 = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1740 tree vecutype = get_vectype_for_scalar_type (utype);
1741 tree shift
1742 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1743 - tree_log2 (oprnd1));
1744 tree var = vect_recog_temp_ssa_var (utype, NULL);
1745 stmt_vec_info def_stmt_vinfo;
1747 def_stmt
1748 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1749 build_int_cst (utype, -1),
1750 build_int_cst (utype, 0));
1751 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1752 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1753 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1754 append_pattern_def_seq (stmt_vinfo, def_stmt);
1755 var = vect_recog_temp_ssa_var (utype, NULL);
1756 def_stmt
1757 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1758 gimple_assign_lhs (def_stmt),
1759 shift);
1760 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1761 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1762 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1763 append_pattern_def_seq (stmt_vinfo, def_stmt);
1764 signmask = vect_recog_temp_ssa_var (itype, NULL);
1765 def_stmt
1766 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1767 NULL_TREE);
1768 append_pattern_def_seq (stmt_vinfo, def_stmt);
1770 def_stmt
1771 = gimple_build_assign_with_ops (PLUS_EXPR,
1772 vect_recog_temp_ssa_var (itype, NULL),
1773 oprnd0, signmask);
1774 append_pattern_def_seq (stmt_vinfo, def_stmt);
1775 def_stmt
1776 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1777 vect_recog_temp_ssa_var (itype, NULL),
1778 gimple_assign_lhs (def_stmt),
1779 fold_build2 (MINUS_EXPR, itype,
1780 oprnd1,
1781 build_int_cst (itype,
1782 1)));
1783 append_pattern_def_seq (stmt_vinfo, def_stmt);
1785 pattern_stmt
1786 = gimple_build_assign_with_ops (MINUS_EXPR,
1787 vect_recog_temp_ssa_var (itype, NULL),
1788 gimple_assign_lhs (def_stmt),
1789 signmask);
1792 if (vect_print_dump_info (REPORT_DETAILS))
1793 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1795 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1797 *type_in = vectype;
1798 *type_out = vectype;
1799 return pattern_stmt;
1802 /* Function vect_recog_mixed_size_cond_pattern
1804 Try to find the following pattern:
1806 type x_t, y_t;
1807 TYPE a_T, b_T, c_T;
1808 loop:
1809 S1 a_T = x_t CMP y_t ? b_T : c_T;
1811 where type 'TYPE' is an integral type which has different size
1812 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1813 than 'type', the constants need to fit into an integer type
1814 with the same width as 'type'.
1816 Input:
1818 * LAST_STMT: A stmt from which the pattern search begins.
1820 Output:
1822 * TYPE_IN: The type of the input arguments to the pattern.
1824 * TYPE_OUT: The type of the output of this pattern.
1826 * Return value: A new stmt that will be used to replace the pattern.
1827 Additionally a def_stmt is added.
1829 a_it = x_t CMP y_t ? b_it : c_it;
1830 a_T = (TYPE) a_it; */
1832 static gimple
1833 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1834 tree *type_out)
1836 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1837 tree cond_expr, then_clause, else_clause;
1838 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1839 tree type, vectype, comp_vectype, itype, vecitype;
1840 enum machine_mode cmpmode;
1841 gimple pattern_stmt, def_stmt;
1842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1844 if (!is_gimple_assign (last_stmt)
1845 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1846 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1847 return NULL;
1849 cond_expr = gimple_assign_rhs1 (last_stmt);
1850 then_clause = gimple_assign_rhs2 (last_stmt);
1851 else_clause = gimple_assign_rhs3 (last_stmt);
1853 if (TREE_CODE (then_clause) != INTEGER_CST
1854 || TREE_CODE (else_clause) != INTEGER_CST)
1855 return NULL;
1857 if (!COMPARISON_CLASS_P (cond_expr))
1858 return NULL;
1860 comp_vectype
1861 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1862 if (comp_vectype == NULL_TREE)
1863 return NULL;
1865 type = gimple_expr_type (last_stmt);
1866 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1868 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1869 return NULL;
1871 vectype = get_vectype_for_scalar_type (type);
1872 if (vectype == NULL_TREE)
1873 return NULL;
1875 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1876 return NULL;
1878 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1879 TYPE_UNSIGNED (type));
1880 if (itype == NULL_TREE
1881 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1882 return NULL;
1884 vecitype = get_vectype_for_scalar_type (itype);
1885 if (vecitype == NULL_TREE)
1886 return NULL;
1888 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1889 return NULL;
1891 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1893 if (!int_fits_type_p (then_clause, itype)
1894 || !int_fits_type_p (else_clause, itype))
1895 return NULL;
1898 def_stmt
1899 = gimple_build_assign_with_ops3 (COND_EXPR,
1900 vect_recog_temp_ssa_var (itype, NULL),
1901 unshare_expr (cond_expr),
1902 fold_convert (itype, then_clause),
1903 fold_convert (itype, else_clause));
1904 pattern_stmt
1905 = gimple_build_assign_with_ops (NOP_EXPR,
1906 vect_recog_temp_ssa_var (type, NULL),
1907 gimple_assign_lhs (def_stmt), NULL_TREE);
1909 new_pattern_def_seq (stmt_vinfo, def_stmt);
1910 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1911 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1912 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1913 *type_in = vecitype;
1914 *type_out = vectype;
1916 return pattern_stmt;
1920 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1921 true if bool VAR can be optimized that way. */
1923 static bool
1924 check_bool_pattern (tree var, loop_vec_info loop_vinfo)
1926 gimple def_stmt;
1927 enum vect_def_type dt;
1928 tree def, rhs1;
1929 enum tree_code rhs_code;
1931 if (!vect_is_simple_use (var, NULL, loop_vinfo, NULL, &def_stmt, &def, &dt))
1932 return false;
1934 if (dt != vect_internal_def)
1935 return false;
1937 if (!is_gimple_assign (def_stmt))
1938 return false;
1940 if (!has_single_use (def))
1941 return false;
1943 rhs1 = gimple_assign_rhs1 (def_stmt);
1944 rhs_code = gimple_assign_rhs_code (def_stmt);
1945 switch (rhs_code)
1947 case SSA_NAME:
1948 return check_bool_pattern (rhs1, loop_vinfo);
1950 CASE_CONVERT:
1951 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1952 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1953 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1954 return false;
1955 return check_bool_pattern (rhs1, loop_vinfo);
1957 case BIT_NOT_EXPR:
1958 return check_bool_pattern (rhs1, loop_vinfo);
1960 case BIT_AND_EXPR:
1961 case BIT_IOR_EXPR:
1962 case BIT_XOR_EXPR:
1963 if (!check_bool_pattern (rhs1, loop_vinfo))
1964 return false;
1965 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
1967 default:
1968 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
1970 tree vecitype, comp_vectype;
1972 /* If the comparison can throw, then is_gimple_condexpr will be
1973 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
1974 if (stmt_could_throw_p (def_stmt))
1975 return false;
1977 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
1978 if (comp_vectype == NULL_TREE)
1979 return false;
1981 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
1983 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1984 tree itype
1985 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
1986 vecitype = get_vectype_for_scalar_type (itype);
1987 if (vecitype == NULL_TREE)
1988 return false;
1990 else
1991 vecitype = comp_vectype;
1992 return expand_vec_cond_expr_p (vecitype, comp_vectype);
1994 return false;
1999 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2000 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2001 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2003 static tree
2004 adjust_bool_pattern_cast (tree type, tree var)
2006 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2007 gimple cast_stmt, pattern_stmt;
2009 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2010 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2011 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2012 cast_stmt
2013 = gimple_build_assign_with_ops (NOP_EXPR,
2014 vect_recog_temp_ssa_var (type, NULL),
2015 gimple_assign_lhs (pattern_stmt),
2016 NULL_TREE);
2017 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2018 return gimple_assign_lhs (cast_stmt);
2022 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2023 recursively. VAR is an SSA_NAME that should be transformed from bool
2024 to a wider integer type, OUT_TYPE is the desired final integer type of
2025 the whole pattern, TRUEVAL should be NULL unless optimizing
2026 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2027 in the then_clause, STMTS is where statements with added pattern stmts
2028 should be pushed to. */
2030 static tree
2031 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2032 VEC (gimple, heap) **stmts)
2034 gimple stmt = SSA_NAME_DEF_STMT (var);
2035 enum tree_code rhs_code, def_rhs_code;
2036 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2037 location_t loc;
2038 gimple pattern_stmt, def_stmt;
2040 rhs1 = gimple_assign_rhs1 (stmt);
2041 rhs2 = gimple_assign_rhs2 (stmt);
2042 rhs_code = gimple_assign_rhs_code (stmt);
2043 loc = gimple_location (stmt);
2044 switch (rhs_code)
2046 case SSA_NAME:
2047 CASE_CONVERT:
2048 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2049 itype = TREE_TYPE (irhs1);
2050 pattern_stmt
2051 = gimple_build_assign_with_ops (SSA_NAME,
2052 vect_recog_temp_ssa_var (itype, NULL),
2053 irhs1, NULL_TREE);
2054 break;
2056 case BIT_NOT_EXPR:
2057 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2058 itype = TREE_TYPE (irhs1);
2059 pattern_stmt
2060 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2061 vect_recog_temp_ssa_var (itype, NULL),
2062 irhs1, build_int_cst (itype, 1));
2063 break;
2065 case BIT_AND_EXPR:
2066 /* Try to optimize x = y & (a < b ? 1 : 0); into
2067 x = (a < b ? y : 0);
2069 E.g. for:
2070 bool a_b, b_b, c_b;
2071 TYPE d_T;
2073 S1 a_b = x1 CMP1 y1;
2074 S2 b_b = x2 CMP2 y2;
2075 S3 c_b = a_b & b_b;
2076 S4 d_T = (TYPE) c_b;
2078 we would normally emit:
2080 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2081 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2082 S3' c_T = a_T & b_T;
2083 S4' d_T = c_T;
2085 but we can save one stmt by using the
2086 result of one of the COND_EXPRs in the other COND_EXPR and leave
2087 BIT_AND_EXPR stmt out:
2089 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2090 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2091 S4' f_T = c_T;
2093 At least when VEC_COND_EXPR is implemented using masks
2094 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2095 computes the comparison masks and ands it, in one case with
2096 all ones vector, in the other case with a vector register.
2097 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2098 often more expensive. */
2099 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2100 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2101 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2103 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2104 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2105 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2106 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2108 gimple tstmt;
2109 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2110 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2111 tstmt = VEC_pop (gimple, *stmts);
2112 gcc_assert (tstmt == def_stmt);
2113 VEC_quick_push (gimple, *stmts, stmt);
2114 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2115 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2116 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2117 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2118 return irhs2;
2120 else
2121 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2122 goto and_ior_xor;
2124 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2125 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2126 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2128 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2129 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2130 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2131 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2133 gimple tstmt;
2134 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2135 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2136 tstmt = VEC_pop (gimple, *stmts);
2137 gcc_assert (tstmt == def_stmt);
2138 VEC_quick_push (gimple, *stmts, stmt);
2139 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2140 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2141 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2142 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2143 return irhs1;
2145 else
2146 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2147 goto and_ior_xor;
2149 /* FALLTHRU */
2150 case BIT_IOR_EXPR:
2151 case BIT_XOR_EXPR:
2152 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2153 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2154 and_ior_xor:
2155 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2156 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2158 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2159 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2160 int out_prec = TYPE_PRECISION (out_type);
2161 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2162 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2163 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2164 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2165 else
2167 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2168 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2171 itype = TREE_TYPE (irhs1);
2172 pattern_stmt
2173 = gimple_build_assign_with_ops (rhs_code,
2174 vect_recog_temp_ssa_var (itype, NULL),
2175 irhs1, irhs2);
2176 break;
2178 default:
2179 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2180 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2181 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2183 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2184 itype
2185 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2187 else
2188 itype = TREE_TYPE (rhs1);
2189 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2190 if (trueval == NULL_TREE)
2191 trueval = build_int_cst (itype, 1);
2192 else
2193 gcc_checking_assert (useless_type_conversion_p (itype,
2194 TREE_TYPE (trueval)));
2195 pattern_stmt
2196 = gimple_build_assign_with_ops3 (COND_EXPR,
2197 vect_recog_temp_ssa_var (itype, NULL),
2198 cond_expr, trueval,
2199 build_int_cst (itype, 0));
2200 break;
2203 VEC_safe_push (gimple, heap, *stmts, stmt);
2204 gimple_set_location (pattern_stmt, loc);
2205 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2206 return gimple_assign_lhs (pattern_stmt);
2210 /* Function vect_recog_bool_pattern
2212 Try to find pattern like following:
2214 bool a_b, b_b, c_b, d_b, e_b;
2215 TYPE f_T;
2216 loop:
2217 S1 a_b = x1 CMP1 y1;
2218 S2 b_b = x2 CMP2 y2;
2219 S3 c_b = a_b & b_b;
2220 S4 d_b = x3 CMP3 y3;
2221 S5 e_b = c_b | d_b;
2222 S6 f_T = (TYPE) e_b;
2224 where type 'TYPE' is an integral type.
2226 Input:
2228 * LAST_STMT: A stmt at the end from which the pattern
2229 search begins, i.e. cast of a bool to
2230 an integer type.
2232 Output:
2234 * TYPE_IN: The type of the input arguments to the pattern.
2236 * TYPE_OUT: The type of the output of this pattern.
2238 * Return value: A new stmt that will be used to replace the pattern.
2240 Assuming size of TYPE is the same as size of all comparisons
2241 (otherwise some casts would be added where needed), the above
2242 sequence we create related pattern stmts:
2243 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2244 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2245 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2246 S5' e_T = c_T | d_T;
2247 S6' f_T = e_T;
2249 Instead of the above S3' we could emit:
2250 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2251 S3' c_T = a_T | b_T;
2252 but the above is more efficient. */
2254 static gimple
2255 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2256 tree *type_out)
2258 gimple last_stmt = VEC_pop (gimple, *stmts);
2259 enum tree_code rhs_code;
2260 tree var, lhs, rhs, vectype;
2261 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2262 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2263 gimple pattern_stmt;
2265 if (!is_gimple_assign (last_stmt))
2266 return NULL;
2268 var = gimple_assign_rhs1 (last_stmt);
2269 lhs = gimple_assign_lhs (last_stmt);
2271 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2272 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2273 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2274 return NULL;
2276 rhs_code = gimple_assign_rhs_code (last_stmt);
2277 if (CONVERT_EXPR_CODE_P (rhs_code))
2279 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2280 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2281 return NULL;
2282 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2283 if (vectype == NULL_TREE)
2284 return NULL;
2286 if (!check_bool_pattern (var, loop_vinfo))
2287 return NULL;
2289 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2290 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2291 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2292 pattern_stmt
2293 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2294 else
2295 pattern_stmt
2296 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2297 *type_out = vectype;
2298 *type_in = vectype;
2299 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2300 return pattern_stmt;
2302 else if (rhs_code == SSA_NAME
2303 && STMT_VINFO_DATA_REF (stmt_vinfo))
2305 stmt_vec_info pattern_stmt_info;
2306 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2307 gcc_assert (vectype != NULL_TREE);
2308 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2309 return NULL;
2310 if (!check_bool_pattern (var, loop_vinfo))
2311 return NULL;
2313 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2314 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2315 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2317 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2318 gimple cast_stmt
2319 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2320 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2321 rhs = rhs2;
2323 pattern_stmt
2324 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2325 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2326 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2327 STMT_VINFO_DATA_REF (pattern_stmt_info)
2328 = STMT_VINFO_DATA_REF (stmt_vinfo);
2329 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2330 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2331 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2332 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2333 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2334 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2335 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2336 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2337 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2338 *type_out = vectype;
2339 *type_in = vectype;
2340 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2341 return pattern_stmt;
2343 else
2344 return NULL;
2348 /* Mark statements that are involved in a pattern. */
2350 static inline void
2351 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2352 tree pattern_vectype)
2354 stmt_vec_info pattern_stmt_info, def_stmt_info;
2355 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2356 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2357 gimple def_stmt;
2359 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2360 if (pattern_stmt_info == NULL)
2362 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2363 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2365 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2367 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2368 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2369 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2370 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2371 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2372 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2373 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2374 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2375 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2377 gimple_stmt_iterator si;
2378 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2379 !gsi_end_p (si); gsi_next (&si))
2381 def_stmt = gsi_stmt (si);
2382 def_stmt_info = vinfo_for_stmt (def_stmt);
2383 if (def_stmt_info == NULL)
2385 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
2386 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2388 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2389 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2390 STMT_VINFO_DEF_TYPE (def_stmt_info)
2391 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2392 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2393 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2398 /* Function vect_pattern_recog_1
2400 Input:
2401 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2402 computation pattern.
2403 STMT: A stmt from which the pattern search should start.
2405 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2406 expression that computes the same functionality and can be used to
2407 replace the sequence of stmts that are involved in the pattern.
2409 Output:
2410 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2411 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2412 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2413 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2414 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2415 to the available target pattern.
2417 This function also does some bookkeeping, as explained in the documentation
2418 for vect_recog_pattern. */
2420 static void
2421 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2422 gimple_stmt_iterator si,
2423 VEC (gimple, heap) **stmts_to_replace)
2425 gimple stmt = gsi_stmt (si), pattern_stmt;
2426 stmt_vec_info stmt_info;
2427 loop_vec_info loop_vinfo;
2428 tree pattern_vectype;
2429 tree type_in, type_out;
2430 enum tree_code code;
2431 int i;
2432 gimple next;
2434 VEC_truncate (gimple, *stmts_to_replace, 0);
2435 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2436 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2437 if (!pattern_stmt)
2438 return;
2440 stmt = VEC_last (gimple, *stmts_to_replace);
2441 stmt_info = vinfo_for_stmt (stmt);
2442 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2444 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2446 /* No need to check target support (already checked by the pattern
2447 recognition function). */
2448 pattern_vectype = type_out ? type_out : type_in;
2450 else
2452 enum machine_mode vec_mode;
2453 enum insn_code icode;
2454 optab optab;
2456 /* Check target support */
2457 type_in = get_vectype_for_scalar_type (type_in);
2458 if (!type_in)
2459 return;
2460 if (type_out)
2461 type_out = get_vectype_for_scalar_type (type_out);
2462 else
2463 type_out = type_in;
2464 if (!type_out)
2465 return;
2466 pattern_vectype = type_out;
2468 if (is_gimple_assign (pattern_stmt))
2469 code = gimple_assign_rhs_code (pattern_stmt);
2470 else
2472 gcc_assert (is_gimple_call (pattern_stmt));
2473 code = CALL_EXPR;
2476 optab = optab_for_tree_code (code, type_in, optab_default);
2477 vec_mode = TYPE_MODE (type_in);
2478 if (!optab
2479 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2480 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2481 return;
2484 /* Found a vectorizable pattern. */
2485 if (vect_print_dump_info (REPORT_DETAILS))
2487 fprintf (vect_dump, "pattern recognized: ");
2488 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2491 /* Mark the stmts that are involved in the pattern. */
2492 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2494 /* Patterns cannot be vectorized using SLP, because they change the order of
2495 computation. */
2496 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2497 if (next == stmt)
2498 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2500 /* It is possible that additional pattern stmts are created and inserted in
2501 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2502 relevant statements. */
2503 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2504 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2505 i++)
2507 stmt_info = vinfo_for_stmt (stmt);
2508 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2509 if (vect_print_dump_info (REPORT_DETAILS))
2511 fprintf (vect_dump, "additional pattern stmt: ");
2512 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2515 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2520 /* Function vect_pattern_recog
2522 Input:
2523 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2524 computation idioms.
2526 Output - for each computation idiom that is detected we create a new stmt
2527 that provides the same functionality and that can be vectorized. We
2528 also record some information in the struct_stmt_info of the relevant
2529 stmts, as explained below:
2531 At the entry to this function we have the following stmts, with the
2532 following initial value in the STMT_VINFO fields:
2534 stmt in_pattern_p related_stmt vec_stmt
2535 S1: a_i = .... - - -
2536 S2: a_2 = ..use(a_i).. - - -
2537 S3: a_1 = ..use(a_2).. - - -
2538 S4: a_0 = ..use(a_1).. - - -
2539 S5: ... = ..use(a_0).. - - -
2541 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2542 represented by a single stmt. We then:
2543 - create a new stmt S6 equivalent to the pattern (the stmt is not
2544 inserted into the code)
2545 - fill in the STMT_VINFO fields as follows:
2547 in_pattern_p related_stmt vec_stmt
2548 S1: a_i = .... - - -
2549 S2: a_2 = ..use(a_i).. - - -
2550 S3: a_1 = ..use(a_2).. - - -
2551 S4: a_0 = ..use(a_1).. true S6 -
2552 '---> S6: a_new = .... - S4 -
2553 S5: ... = ..use(a_0).. - - -
2555 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2556 to each other through the RELATED_STMT field).
2558 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2559 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2560 remain irrelevant unless used by stmts other than S4.
2562 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2563 (because they are marked as irrelevant). It will vectorize S6, and record
2564 a pointer to the new vector stmt VS6 from S6 (as usual).
2565 S4 will be skipped, and S5 will be vectorized as usual:
2567 in_pattern_p related_stmt vec_stmt
2568 S1: a_i = .... - - -
2569 S2: a_2 = ..use(a_i).. - - -
2570 S3: a_1 = ..use(a_2).. - - -
2571 > VS6: va_new = .... - - -
2572 S4: a_0 = ..use(a_1).. true S6 VS6
2573 '---> S6: a_new = .... - S4 VS6
2574 > VS5: ... = ..vuse(va_new).. - - -
2575 S5: ... = ..use(a_0).. - - -
2577 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2578 elsewhere), and we'll end up with:
2580 VS6: va_new = ....
2581 VS5: ... = ..vuse(va_new)..
2583 In case of more than one pattern statements, e.g., widen-mult with
2584 intermediate type:
2586 S1 a_t = ;
2587 S2 a_T = (TYPE) a_t;
2588 '--> S3: a_it = (interm_type) a_t;
2589 S4 prod_T = a_T * CONST;
2590 '--> S5: prod_T' = a_it w* CONST;
2592 there may be other users of a_T outside the pattern. In that case S2 will
2593 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2594 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2595 be recorded in S3. */
2597 void
2598 vect_pattern_recog (loop_vec_info loop_vinfo)
2600 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2601 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2602 unsigned int nbbs = loop->num_nodes;
2603 gimple_stmt_iterator si;
2604 unsigned int i, j;
2605 vect_recog_func_ptr vect_recog_func;
2606 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2608 if (vect_print_dump_info (REPORT_DETAILS))
2609 fprintf (vect_dump, "=== vect_pattern_recog ===");
2611 /* Scan through the loop stmts, applying the pattern recognition
2612 functions starting at each stmt visited: */
2613 for (i = 0; i < nbbs; i++)
2615 basic_block bb = bbs[i];
2616 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2618 /* Scan over all generic vect_recog_xxx_pattern functions. */
2619 for (j = 0; j < NUM_PATTERNS; j++)
2621 vect_recog_func = vect_vect_recog_func_ptrs[j];
2622 vect_pattern_recog_1 (vect_recog_func, si,
2623 &stmts_to_replace);
2628 VEC_free (gimple, heap, stmts_to_replace);