re PR bootstrap/51346 (LTO bootstrap failed with bootstrap-profiled)
[official-gcc.git] / gcc / tree-vect-patterns.c
blob306bac28c18fef74540b95d06ab47e0057db506c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 tree *, tree *);
56 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
57 tree *, tree *);
58 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
59 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
60 vect_recog_widen_mult_pattern,
61 vect_recog_widen_sum_pattern,
62 vect_recog_dot_prod_pattern,
63 vect_recog_pow_pattern,
64 vect_recog_over_widening_pattern,
65 vect_recog_widen_shift_pattern,
66 vect_recog_vector_vector_shift_pattern,
67 vect_recog_mixed_size_cond_pattern,
68 vect_recog_bool_pattern};
70 /* Function widened_name_p
72 Check whether NAME, an ssa-name used in USE_STMT,
73 is a result of a type-promotion, such that:
74 DEF_STMT: NAME = NOP (name0)
75 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
76 If CHECK_SIGN is TRUE, check that either both types are signed or both are
77 unsigned. */
79 static bool
80 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
81 bool check_sign)
83 tree dummy;
84 gimple dummy_gimple;
85 loop_vec_info loop_vinfo;
86 stmt_vec_info stmt_vinfo;
87 tree type = TREE_TYPE (name);
88 tree oprnd0;
89 enum vect_def_type dt;
90 tree def;
92 stmt_vinfo = vinfo_for_stmt (use_stmt);
93 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
95 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
96 return false;
98 if (dt != vect_internal_def
99 && dt != vect_external_def && dt != vect_constant_def)
100 return false;
102 if (! *def_stmt)
103 return false;
105 if (!is_gimple_assign (*def_stmt))
106 return false;
108 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
109 return false;
111 oprnd0 = gimple_assign_rhs1 (*def_stmt);
113 *half_type = TREE_TYPE (oprnd0);
114 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
115 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
116 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
117 return false;
119 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
120 &dt))
121 return false;
123 return true;
126 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
127 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
129 static tree
130 vect_recog_temp_ssa_var (tree type, gimple stmt)
132 tree var = create_tmp_var (type, "patt");
134 add_referenced_var (var);
135 var = make_ssa_name (var, stmt);
136 return var;
139 /* Function vect_recog_dot_prod_pattern
141 Try to find the following pattern:
143 type x_t, y_t;
144 TYPE1 prod;
145 TYPE2 sum = init;
146 loop:
147 sum_0 = phi <init, sum_1>
148 S1 x_t = ...
149 S2 y_t = ...
150 S3 x_T = (TYPE1) x_t;
151 S4 y_T = (TYPE1) y_t;
152 S5 prod = x_T * y_T;
153 [S6 prod = (TYPE2) prod; #optional]
154 S7 sum_1 = prod + sum_0;
156 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
157 same size of 'TYPE1' or bigger. This is a special case of a reduction
158 computation.
160 Input:
162 * STMTS: Contains a stmt from which the pattern search begins. In the
163 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
164 will be detected.
166 Output:
168 * TYPE_IN: The type of the input arguments to the pattern.
170 * TYPE_OUT: The type of the output of this pattern.
172 * Return value: A new stmt that will be used to replace the sequence of
173 stmts that constitute the pattern. In this case it will be:
174 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
176 Note: The dot-prod idiom is a widening reduction pattern that is
177 vectorized without preserving all the intermediate results. It
178 produces only N/2 (widened) results (by summing up pairs of
179 intermediate results) rather than all N results. Therefore, we
180 cannot allow this pattern when we want to get all the results and in
181 the correct order (as is the case when this computation is in an
182 inner-loop nested in an outer-loop that us being vectorized). */
184 static gimple
185 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
186 tree *type_out)
188 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
189 tree oprnd0, oprnd1;
190 tree oprnd00, oprnd01;
191 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
192 tree type, half_type;
193 gimple pattern_stmt;
194 tree prod_type;
195 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
196 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
197 tree var;
199 if (!is_gimple_assign (last_stmt))
200 return NULL;
202 type = gimple_expr_type (last_stmt);
204 /* Look for the following pattern
205 DX = (TYPE1) X;
206 DY = (TYPE1) Y;
207 DPROD = DX * DY;
208 DDPROD = (TYPE2) DPROD;
209 sum_1 = DDPROD + sum_0;
210 In which
211 - DX is double the size of X
212 - DY is double the size of Y
213 - DX, DY, DPROD all have the same type
214 - sum is the same size of DPROD or bigger
215 - sum has been recognized as a reduction variable.
217 This is equivalent to:
218 DPROD = X w* Y; #widen mult
219 sum_1 = DPROD w+ sum_0; #widen summation
221 DPROD = X w* Y; #widen mult
222 sum_1 = DPROD + sum_0; #summation
225 /* Starting from LAST_STMT, follow the defs of its uses in search
226 of the above pattern. */
228 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
229 return NULL;
231 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
233 /* Has been detected as widening-summation? */
235 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
236 type = gimple_expr_type (stmt);
237 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
238 return NULL;
239 oprnd0 = gimple_assign_rhs1 (stmt);
240 oprnd1 = gimple_assign_rhs2 (stmt);
241 half_type = TREE_TYPE (oprnd0);
243 else
245 gimple def_stmt;
247 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
248 return NULL;
249 oprnd0 = gimple_assign_rhs1 (last_stmt);
250 oprnd1 = gimple_assign_rhs2 (last_stmt);
251 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
252 || !types_compatible_p (TREE_TYPE (oprnd1), type))
253 return NULL;
254 stmt = last_stmt;
256 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
258 stmt = def_stmt;
259 oprnd0 = gimple_assign_rhs1 (stmt);
261 else
262 half_type = type;
265 /* So far so good. Since last_stmt was detected as a (summation) reduction,
266 we know that oprnd1 is the reduction variable (defined by a loop-header
267 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
268 Left to check that oprnd0 is defined by a (widen_)mult_expr */
269 if (TREE_CODE (oprnd0) != SSA_NAME)
270 return NULL;
272 prod_type = half_type;
273 stmt = SSA_NAME_DEF_STMT (oprnd0);
275 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
276 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
277 return NULL;
279 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
280 inside the loop (in case we are analyzing an outer-loop). */
281 if (!is_gimple_assign (stmt))
282 return NULL;
283 stmt_vinfo = vinfo_for_stmt (stmt);
284 gcc_assert (stmt_vinfo);
285 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
286 return NULL;
287 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
288 return NULL;
289 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
291 /* Has been detected as a widening multiplication? */
293 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
294 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
295 return NULL;
296 stmt_vinfo = vinfo_for_stmt (stmt);
297 gcc_assert (stmt_vinfo);
298 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
299 oprnd00 = gimple_assign_rhs1 (stmt);
300 oprnd01 = gimple_assign_rhs2 (stmt);
302 else
304 tree half_type0, half_type1;
305 gimple def_stmt;
306 tree oprnd0, oprnd1;
308 oprnd0 = gimple_assign_rhs1 (stmt);
309 oprnd1 = gimple_assign_rhs2 (stmt);
310 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
311 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
312 return NULL;
313 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
314 return NULL;
315 oprnd00 = gimple_assign_rhs1 (def_stmt);
316 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
317 return NULL;
318 oprnd01 = gimple_assign_rhs1 (def_stmt);
319 if (!types_compatible_p (half_type0, half_type1))
320 return NULL;
321 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
322 return NULL;
325 half_type = TREE_TYPE (oprnd00);
326 *type_in = half_type;
327 *type_out = type;
329 /* Pattern detected. Create a stmt to be used to replace the pattern: */
330 var = vect_recog_temp_ssa_var (type, NULL);
331 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
332 oprnd00, oprnd01, oprnd1);
334 if (vect_print_dump_info (REPORT_DETAILS))
336 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
337 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
340 /* We don't allow changing the order of the computation in the inner-loop
341 when doing outer-loop vectorization. */
342 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
344 return pattern_stmt;
348 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
349 and LSHIFT_EXPR.
351 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
352 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
354 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
355 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
356 that satisfies the above restrictions, we can perform a widening opeartion
357 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
358 with a_it = (interm_type) a_t; */
360 static bool
361 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
362 tree const_oprnd, tree *oprnd,
363 VEC (gimple, heap) **stmts, tree type,
364 tree *half_type, gimple def_stmt)
366 tree new_type, new_oprnd, tmp;
367 gimple new_stmt;
368 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
369 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
371 if (code != MULT_EXPR && code != LSHIFT_EXPR)
372 return false;
374 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
375 || (code == LSHIFT_EXPR
376 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
377 != 1))
378 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
380 /* CONST_OPRND is a constant of HALF_TYPE. */
381 *oprnd = gimple_assign_rhs1 (def_stmt);
382 return true;
385 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
386 || !gimple_bb (def_stmt)
387 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
388 || !vinfo_for_stmt (def_stmt))
389 return false;
391 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
392 a type 2 times bigger than HALF_TYPE. */
393 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
394 TYPE_UNSIGNED (type));
395 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
396 || (code == LSHIFT_EXPR
397 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
398 return false;
400 /* Use NEW_TYPE for widening operation. */
401 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
403 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
404 /* Check if the already created pattern stmt is what we need. */
405 if (!is_gimple_assign (new_stmt)
406 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
407 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
408 return false;
410 VEC_safe_push (gimple, heap, *stmts, def_stmt);
411 *oprnd = gimple_assign_lhs (new_stmt);
413 else
415 /* Create a_T = (NEW_TYPE) a_t; */
416 *oprnd = gimple_assign_rhs1 (def_stmt);
417 tmp = create_tmp_var (new_type, NULL);
418 add_referenced_var (tmp);
419 new_oprnd = make_ssa_name (tmp, NULL);
420 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
421 NULL_TREE);
422 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
423 VEC_safe_push (gimple, heap, *stmts, def_stmt);
424 *oprnd = new_oprnd;
427 *half_type = new_type;
428 return true;
432 /* Function vect_recog_widen_mult_pattern
434 Try to find the following pattern:
436 type a_t, b_t;
437 TYPE a_T, b_T, prod_T;
439 S1 a_t = ;
440 S2 b_t = ;
441 S3 a_T = (TYPE) a_t;
442 S4 b_T = (TYPE) b_t;
443 S5 prod_T = a_T * b_T;
445 where type 'TYPE' is at least double the size of type 'type'.
447 Also detect unsgigned cases:
449 unsigned type a_t, b_t;
450 unsigned TYPE u_prod_T;
451 TYPE a_T, b_T, prod_T;
453 S1 a_t = ;
454 S2 b_t = ;
455 S3 a_T = (TYPE) a_t;
456 S4 b_T = (TYPE) b_t;
457 S5 prod_T = a_T * b_T;
458 S6 u_prod_T = (unsigned TYPE) prod_T;
460 and multiplication by constants:
462 type a_t;
463 TYPE a_T, prod_T;
465 S1 a_t = ;
466 S3 a_T = (TYPE) a_t;
467 S5 prod_T = a_T * CONST;
469 A special case of multiplication by constants is when 'TYPE' is 4 times
470 bigger than 'type', but CONST fits an intermediate type 2 times smaller
471 than 'TYPE'. In that case we create an additional pattern stmt for S3
472 to create a variable of the intermediate type, and perform widen-mult
473 on the intermediate type as well:
475 type a_t;
476 interm_type a_it;
477 TYPE a_T, prod_T, prod_T';
479 S1 a_t = ;
480 S3 a_T = (TYPE) a_t;
481 '--> a_it = (interm_type) a_t;
482 S5 prod_T = a_T * CONST;
483 '--> prod_T' = a_it w* CONST;
485 Input/Output:
487 * STMTS: Contains a stmt from which the pattern search begins. In the
488 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
489 is detected. In case of unsigned widen-mult, the original stmt (S5) is
490 replaced with S6 in STMTS. In case of multiplication by a constant
491 of an intermediate type (the last case above), STMTS also contains S3
492 (inserted before S5).
494 Output:
496 * TYPE_IN: The type of the input arguments to the pattern.
498 * TYPE_OUT: The type of the output of this pattern.
500 * Return value: A new stmt that will be used to replace the sequence of
501 stmts that constitute the pattern. In this case it will be:
502 WIDEN_MULT <a_t, b_t>
505 static gimple
506 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
507 tree *type_in, tree *type_out)
509 gimple last_stmt = VEC_pop (gimple, *stmts);
510 gimple def_stmt0, def_stmt1;
511 tree oprnd0, oprnd1;
512 tree type, half_type0, half_type1;
513 gimple pattern_stmt;
514 tree vectype, vectype_out = NULL_TREE;
515 tree dummy;
516 tree var;
517 enum tree_code dummy_code;
518 int dummy_int;
519 VEC (tree, heap) *dummy_vec;
520 bool op1_ok;
522 if (!is_gimple_assign (last_stmt))
523 return NULL;
525 type = gimple_expr_type (last_stmt);
527 /* Starting from LAST_STMT, follow the defs of its uses in search
528 of the above pattern. */
530 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
531 return NULL;
533 oprnd0 = gimple_assign_rhs1 (last_stmt);
534 oprnd1 = gimple_assign_rhs2 (last_stmt);
535 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
536 || !types_compatible_p (TREE_TYPE (oprnd1), type))
537 return NULL;
539 /* Check argument 0. */
540 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
541 return NULL;
542 /* Check argument 1. */
543 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
545 if (op1_ok)
547 oprnd0 = gimple_assign_rhs1 (def_stmt0);
548 oprnd1 = gimple_assign_rhs1 (def_stmt1);
550 else
552 if (TREE_CODE (oprnd1) == INTEGER_CST
553 && TREE_CODE (half_type0) == INTEGER_TYPE
554 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
555 &oprnd0, stmts, type,
556 &half_type0, def_stmt0))
557 half_type1 = half_type0;
558 else
559 return NULL;
562 /* Handle unsigned case. Look for
563 S6 u_prod_T = (unsigned TYPE) prod_T;
564 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
565 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
567 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
568 imm_use_iterator imm_iter;
569 use_operand_p use_p;
570 int nuses = 0;
571 gimple use_stmt = NULL;
572 tree use_type;
574 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
575 return NULL;
577 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
579 if (is_gimple_debug (USE_STMT (use_p)))
580 continue;
581 use_stmt = USE_STMT (use_p);
582 nuses++;
585 if (nuses != 1 || !is_gimple_assign (use_stmt)
586 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
587 return NULL;
589 use_lhs = gimple_assign_lhs (use_stmt);
590 use_type = TREE_TYPE (use_lhs);
591 if (!INTEGRAL_TYPE_P (use_type)
592 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
593 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
594 return NULL;
596 type = use_type;
597 last_stmt = use_stmt;
600 if (!types_compatible_p (half_type0, half_type1))
601 return NULL;
603 /* Pattern detected. */
604 if (vect_print_dump_info (REPORT_DETAILS))
605 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
607 /* Check target support */
608 vectype = get_vectype_for_scalar_type (half_type0);
609 vectype_out = get_vectype_for_scalar_type (type);
610 if (!vectype
611 || !vectype_out
612 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
613 vectype_out, vectype,
614 &dummy, &dummy, &dummy_code,
615 &dummy_code, &dummy_int, &dummy_vec))
616 return NULL;
618 *type_in = vectype;
619 *type_out = vectype_out;
621 /* Pattern supported. Create a stmt to be used to replace the pattern: */
622 var = vect_recog_temp_ssa_var (type, NULL);
623 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
624 oprnd1);
626 if (vect_print_dump_info (REPORT_DETAILS))
627 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
629 VEC_safe_push (gimple, heap, *stmts, last_stmt);
630 return pattern_stmt;
634 /* Function vect_recog_pow_pattern
636 Try to find the following pattern:
638 x = POW (y, N);
640 with POW being one of pow, powf, powi, powif and N being
641 either 2 or 0.5.
643 Input:
645 * LAST_STMT: A stmt from which the pattern search begins.
647 Output:
649 * TYPE_IN: The type of the input arguments to the pattern.
651 * TYPE_OUT: The type of the output of this pattern.
653 * Return value: A new stmt that will be used to replace the sequence of
654 stmts that constitute the pattern. In this case it will be:
655 x = x * x
657 x = sqrt (x)
660 static gimple
661 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
662 tree *type_out)
664 gimple last_stmt = VEC_index (gimple, *stmts, 0);
665 tree fn, base, exp = NULL;
666 gimple stmt;
667 tree var;
669 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
670 return NULL;
672 fn = gimple_call_fndecl (last_stmt);
673 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
674 return NULL;
676 switch (DECL_FUNCTION_CODE (fn))
678 case BUILT_IN_POWIF:
679 case BUILT_IN_POWI:
680 case BUILT_IN_POWF:
681 case BUILT_IN_POW:
682 base = gimple_call_arg (last_stmt, 0);
683 exp = gimple_call_arg (last_stmt, 1);
684 if (TREE_CODE (exp) != REAL_CST
685 && TREE_CODE (exp) != INTEGER_CST)
686 return NULL;
687 break;
689 default:
690 return NULL;
693 /* We now have a pow or powi builtin function call with a constant
694 exponent. */
696 *type_out = NULL_TREE;
698 /* Catch squaring. */
699 if ((host_integerp (exp, 0)
700 && tree_low_cst (exp, 0) == 2)
701 || (TREE_CODE (exp) == REAL_CST
702 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
704 *type_in = TREE_TYPE (base);
706 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
707 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
708 return stmt;
711 /* Catch square root. */
712 if (TREE_CODE (exp) == REAL_CST
713 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
715 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
716 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
717 if (*type_in)
719 gimple stmt = gimple_build_call (newfn, 1, base);
720 if (vectorizable_function (stmt, *type_in, *type_in)
721 != NULL_TREE)
723 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
724 gimple_call_set_lhs (stmt, var);
725 return stmt;
730 return NULL;
734 /* Function vect_recog_widen_sum_pattern
736 Try to find the following pattern:
738 type x_t;
739 TYPE x_T, sum = init;
740 loop:
741 sum_0 = phi <init, sum_1>
742 S1 x_t = *p;
743 S2 x_T = (TYPE) x_t;
744 S3 sum_1 = x_T + sum_0;
746 where type 'TYPE' is at least double the size of type 'type', i.e - we're
747 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
748 a special case of a reduction computation.
750 Input:
752 * LAST_STMT: A stmt from which the pattern search begins. In the example,
753 when this function is called with S3, the pattern {S2,S3} will be detected.
755 Output:
757 * TYPE_IN: The type of the input arguments to the pattern.
759 * TYPE_OUT: The type of the output of this pattern.
761 * Return value: A new stmt that will be used to replace the sequence of
762 stmts that constitute the pattern. In this case it will be:
763 WIDEN_SUM <x_t, sum_0>
765 Note: The widening-sum idiom is a widening reduction pattern that is
766 vectorized without preserving all the intermediate results. It
767 produces only N/2 (widened) results (by summing up pairs of
768 intermediate results) rather than all N results. Therefore, we
769 cannot allow this pattern when we want to get all the results and in
770 the correct order (as is the case when this computation is in an
771 inner-loop nested in an outer-loop that us being vectorized). */
773 static gimple
774 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
775 tree *type_out)
777 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
778 tree oprnd0, oprnd1;
779 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
780 tree type, half_type;
781 gimple pattern_stmt;
782 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
783 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
784 tree var;
786 if (!is_gimple_assign (last_stmt))
787 return NULL;
789 type = gimple_expr_type (last_stmt);
791 /* Look for the following pattern
792 DX = (TYPE) X;
793 sum_1 = DX + sum_0;
794 In which DX is at least double the size of X, and sum_1 has been
795 recognized as a reduction variable.
798 /* Starting from LAST_STMT, follow the defs of its uses in search
799 of the above pattern. */
801 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
802 return NULL;
804 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
805 return NULL;
807 oprnd0 = gimple_assign_rhs1 (last_stmt);
808 oprnd1 = gimple_assign_rhs2 (last_stmt);
809 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
810 || !types_compatible_p (TREE_TYPE (oprnd1), type))
811 return NULL;
813 /* So far so good. Since last_stmt was detected as a (summation) reduction,
814 we know that oprnd1 is the reduction variable (defined by a loop-header
815 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
816 Left to check that oprnd0 is defined by a cast from type 'type' to type
817 'TYPE'. */
819 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
820 return NULL;
822 oprnd0 = gimple_assign_rhs1 (stmt);
823 *type_in = half_type;
824 *type_out = type;
826 /* Pattern detected. Create a stmt to be used to replace the pattern: */
827 var = vect_recog_temp_ssa_var (type, NULL);
828 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
829 oprnd0, oprnd1);
831 if (vect_print_dump_info (REPORT_DETAILS))
833 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
834 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
837 /* We don't allow changing the order of the computation in the inner-loop
838 when doing outer-loop vectorization. */
839 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
841 return pattern_stmt;
845 /* Return TRUE if the operation in STMT can be performed on a smaller type.
847 Input:
848 STMT - a statement to check.
849 DEF - we support operations with two operands, one of which is constant.
850 The other operand can be defined by a demotion operation, or by a
851 previous statement in a sequence of over-promoted operations. In the
852 later case DEF is used to replace that operand. (It is defined by a
853 pattern statement we created for the previous statement in the
854 sequence).
856 Input/output:
857 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
858 NULL, it's the type of DEF.
859 STMTS - additional pattern statements. If a pattern statement (type
860 conversion) is created in this function, its original statement is
861 added to STMTS.
863 Output:
864 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
865 operands to use in the new pattern statement for STMT (will be created
866 in vect_recog_over_widening_pattern ()).
867 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
868 statements for STMT: the first one is a type promotion and the second
869 one is the operation itself. We return the type promotion statement
870 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
871 the second pattern statement. */
873 static bool
874 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
875 tree *op0, tree *op1, gimple *new_def_stmt,
876 VEC (gimple, heap) **stmts)
878 enum tree_code code;
879 tree const_oprnd, oprnd;
880 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
881 gimple def_stmt, new_stmt;
882 bool first = false;
883 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
884 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
886 *new_def_stmt = NULL;
888 if (!is_gimple_assign (stmt))
889 return false;
891 code = gimple_assign_rhs_code (stmt);
892 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
893 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
894 return false;
896 oprnd = gimple_assign_rhs1 (stmt);
897 const_oprnd = gimple_assign_rhs2 (stmt);
898 type = gimple_expr_type (stmt);
900 if (TREE_CODE (oprnd) != SSA_NAME
901 || TREE_CODE (const_oprnd) != INTEGER_CST)
902 return false;
904 /* If we are in the middle of a sequence, we use DEF from a previous
905 statement. Otherwise, OPRND has to be a result of type promotion. */
906 if (*new_type)
908 half_type = *new_type;
909 oprnd = def;
911 else
913 first = true;
914 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
915 || !gimple_bb (def_stmt)
916 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
917 || !vinfo_for_stmt (def_stmt))
918 return false;
921 /* Can we perform the operation on a smaller type? */
922 switch (code)
924 case BIT_IOR_EXPR:
925 case BIT_XOR_EXPR:
926 case BIT_AND_EXPR:
927 if (!int_fits_type_p (const_oprnd, half_type))
929 /* HALF_TYPE is not enough. Try a bigger type if possible. */
930 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
931 return false;
933 interm_type = build_nonstandard_integer_type (
934 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
935 if (!int_fits_type_p (const_oprnd, interm_type))
936 return false;
939 break;
941 case LSHIFT_EXPR:
942 /* Try intermediate type - HALF_TYPE is not enough for sure. */
943 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
944 return false;
946 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
947 (e.g., if the original value was char, the shift amount is at most 8
948 if we want to use short). */
949 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
950 return false;
952 interm_type = build_nonstandard_integer_type (
953 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
955 if (!vect_supportable_shift (code, interm_type))
956 return false;
958 break;
960 case RSHIFT_EXPR:
961 if (vect_supportable_shift (code, half_type))
962 break;
964 /* Try intermediate type - HALF_TYPE is not supported. */
965 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
966 return false;
968 interm_type = build_nonstandard_integer_type (
969 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
971 if (!vect_supportable_shift (code, interm_type))
972 return false;
974 break;
976 default:
977 gcc_unreachable ();
980 /* There are four possible cases:
981 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
982 the first statement in the sequence)
983 a. The original, HALF_TYPE, is not enough - we replace the promotion
984 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
985 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
986 promotion.
987 2. OPRND is defined by a pattern statement we created.
988 a. Its type is not sufficient for the operation, we create a new stmt:
989 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
990 this statement in NEW_DEF_STMT, and it is later put in
991 STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
992 b. OPRND is good to use in the new statement. */
993 if (first)
995 if (interm_type)
997 /* Replace the original type conversion HALF_TYPE->TYPE with
998 HALF_TYPE->INTERM_TYPE. */
999 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1001 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1002 /* Check if the already created pattern stmt is what we need. */
1003 if (!is_gimple_assign (new_stmt)
1004 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1005 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1006 return false;
1008 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1009 oprnd = gimple_assign_lhs (new_stmt);
1011 else
1013 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1014 oprnd = gimple_assign_rhs1 (def_stmt);
1015 tmp = create_tmp_reg (interm_type, NULL);
1016 add_referenced_var (tmp);
1017 new_oprnd = make_ssa_name (tmp, NULL);
1018 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1019 oprnd, NULL_TREE);
1020 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1021 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1022 oprnd = new_oprnd;
1025 else
1027 /* Retrieve the operand before the type promotion. */
1028 oprnd = gimple_assign_rhs1 (def_stmt);
1031 else
1033 if (interm_type)
1035 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1036 tmp = create_tmp_reg (interm_type, NULL);
1037 add_referenced_var (tmp);
1038 new_oprnd = make_ssa_name (tmp, NULL);
1039 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1040 oprnd, NULL_TREE);
1041 oprnd = new_oprnd;
1042 *new_def_stmt = new_stmt;
1045 /* Otherwise, OPRND is already set. */
1048 if (interm_type)
1049 *new_type = interm_type;
1050 else
1051 *new_type = half_type;
1053 *op0 = oprnd;
1054 *op1 = fold_convert (*new_type, const_oprnd);
1056 return true;
1060 /* Try to find a statement or a sequence of statements that can be performed
1061 on a smaller type:
1063 type x_t;
1064 TYPE x_T, res0_T, res1_T;
1065 loop:
1066 S1 x_t = *p;
1067 S2 x_T = (TYPE) x_t;
1068 S3 res0_T = op (x_T, C0);
1069 S4 res1_T = op (res0_T, C1);
1070 S5 ... = () res1_T; - type demotion
1072 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1073 constants.
1074 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1075 be 'type' or some intermediate type. For now, we expect S5 to be a type
1076 demotion operation. We also check that S3 and S4 have only one use. */
1078 static gimple
1079 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1080 tree *type_in, tree *type_out)
1082 gimple stmt = VEC_pop (gimple, *stmts);
1083 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1084 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1085 imm_use_iterator imm_iter;
1086 use_operand_p use_p;
1087 int nuses = 0;
1088 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1089 bool first;
1090 struct loop *loop = (gimple_bb (stmt))->loop_father;
1091 tree type = NULL;
1093 first = true;
1094 while (1)
1096 if (!vinfo_for_stmt (stmt)
1097 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1098 return NULL;
1100 new_def_stmt = NULL;
1101 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1102 &op0, &op1, &new_def_stmt,
1103 stmts))
1105 if (first)
1106 return NULL;
1107 else
1108 break;
1111 /* STMT can be performed on a smaller type. Check its uses. */
1112 lhs = gimple_assign_lhs (stmt);
1113 nuses = 0;
1114 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1116 if (is_gimple_debug (USE_STMT (use_p)))
1117 continue;
1118 use_stmt = USE_STMT (use_p);
1119 nuses++;
1122 if (nuses != 1 || !is_gimple_assign (use_stmt)
1123 || !gimple_bb (use_stmt)
1124 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1125 return NULL;
1127 /* Create pattern statement for STMT. */
1128 vectype = get_vectype_for_scalar_type (new_type);
1129 if (!vectype)
1130 return NULL;
1132 /* We want to collect all the statements for which we create pattern
1133 statetments, except for the case when the last statement in the
1134 sequence doesn't have a corresponding pattern statement. In such
1135 case we associate the last pattern statement with the last statement
1136 in the sequence. Therefore, we only add the original statement to
1137 the list if we know that it is not the last. */
1138 if (prev_stmt)
1139 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1141 var = vect_recog_temp_ssa_var (new_type, NULL);
1142 pattern_stmt
1143 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1144 op0, op1);
1145 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1146 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
1148 if (vect_print_dump_info (REPORT_DETAILS))
1150 fprintf (vect_dump, "created pattern stmt: ");
1151 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1154 type = gimple_expr_type (stmt);
1155 prev_stmt = stmt;
1156 stmt = use_stmt;
1158 first = false;
1161 /* We got a sequence. We expect it to end with a type demotion operation.
1162 Otherwise, we quit (for now). There are three possible cases: the
1163 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1164 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1165 NEW_TYPE differs (we create a new conversion statement). */
1166 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1168 use_lhs = gimple_assign_lhs (use_stmt);
1169 use_type = TREE_TYPE (use_lhs);
1170 /* Support only type promotion or signedess change. Check that USE_TYPE
1171 is not bigger than the original type. */
1172 if (!INTEGRAL_TYPE_P (use_type)
1173 || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)
1174 || TYPE_PRECISION (type) < TYPE_PRECISION (use_type))
1175 return NULL;
1177 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1178 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1180 /* Create NEW_TYPE->USE_TYPE conversion. */
1181 tmp = create_tmp_reg (use_type, NULL);
1182 add_referenced_var (tmp);
1183 new_oprnd = make_ssa_name (tmp, NULL);
1184 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1185 var, NULL_TREE);
1186 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1188 *type_in = get_vectype_for_scalar_type (new_type);
1189 *type_out = get_vectype_for_scalar_type (use_type);
1191 /* We created a pattern statement for the last statement in the
1192 sequence, so we don't need to associate it with the pattern
1193 statement created for PREV_STMT. Therefore, we add PREV_STMT
1194 to the list in order to mark it later in vect_pattern_recog_1. */
1195 if (prev_stmt)
1196 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1198 else
1200 if (prev_stmt)
1201 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
1202 = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
1204 *type_in = vectype;
1205 *type_out = NULL_TREE;
1208 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1210 else
1211 /* TODO: support general case, create a conversion to the correct type. */
1212 return NULL;
1214 /* Pattern detected. */
1215 if (vect_print_dump_info (REPORT_DETAILS))
1217 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1218 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1221 return pattern_stmt;
1224 /* Detect widening shift pattern:
1226 type a_t;
1227 TYPE a_T, res_T;
1229 S1 a_t = ;
1230 S2 a_T = (TYPE) a_t;
1231 S3 res_T = a_T << CONST;
1233 where type 'TYPE' is at least double the size of type 'type'.
1235 Also detect unsigned cases:
1237 unsigned type a_t;
1238 unsigned TYPE u_res_T;
1239 TYPE a_T, res_T;
1241 S1 a_t = ;
1242 S2 a_T = (TYPE) a_t;
1243 S3 res_T = a_T << CONST;
1244 S4 u_res_T = (unsigned TYPE) res_T;
1246 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1247 create an additional pattern stmt for S2 to create a variable of an
1248 intermediate type, and perform widen-shift on the intermediate type:
1250 type a_t;
1251 interm_type a_it;
1252 TYPE a_T, res_T, res_T';
1254 S1 a_t = ;
1255 S2 a_T = (TYPE) a_t;
1256 '--> a_it = (interm_type) a_t;
1257 S3 res_T = a_T << CONST;
1258 '--> res_T' = a_it <<* CONST;
1260 Input/Output:
1262 * STMTS: Contains a stmt from which the pattern search begins.
1263 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1264 in STMTS. When an intermediate type is used and a pattern statement is
1265 created for S2, we also put S2 here (before S3).
1267 Output:
1269 * TYPE_IN: The type of the input arguments to the pattern.
1271 * TYPE_OUT: The type of the output of this pattern.
1273 * Return value: A new stmt that will be used to replace the sequence of
1274 stmts that constitute the pattern. In this case it will be:
1275 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1277 static gimple
1278 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1279 tree *type_in, tree *type_out)
1281 gimple last_stmt = VEC_pop (gimple, *stmts);
1282 gimple def_stmt0;
1283 tree oprnd0, oprnd1;
1284 tree type, half_type0;
1285 gimple pattern_stmt, orig_stmt = NULL;
1286 tree vectype, vectype_out = NULL_TREE;
1287 tree dummy;
1288 tree var;
1289 enum tree_code dummy_code;
1290 int dummy_int;
1291 VEC (tree, heap) * dummy_vec;
1292 gimple use_stmt = NULL;
1293 bool over_widen = false;
1295 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1296 return NULL;
1298 orig_stmt = last_stmt;
1299 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1301 /* This statement was also detected as over-widening operation (it can't
1302 be any other pattern, because only over-widening detects shifts).
1303 LAST_STMT is the final type demotion statement, but its related
1304 statement is shift. We analyze the related statement to catch cases:
1306 orig code:
1307 type a_t;
1308 itype res;
1309 TYPE a_T, res_T;
1311 S1 a_T = (TYPE) a_t;
1312 S2 res_T = a_T << CONST;
1313 S3 res = (itype)res_T;
1315 (size of type * 2 <= size of itype
1316 and size of itype * 2 <= size of TYPE)
1318 code after over-widening pattern detection:
1320 S1 a_T = (TYPE) a_t;
1321 --> a_it = (itype) a_t;
1322 S2 res_T = a_T << CONST;
1323 S3 res = (itype)res_T; <--- LAST_STMT
1324 --> res = a_it << CONST;
1326 after widen_shift:
1328 S1 a_T = (TYPE) a_t;
1329 --> a_it = (itype) a_t; - redundant
1330 S2 res_T = a_T << CONST;
1331 S3 res = (itype)res_T;
1332 --> res = a_t w<< CONST;
1334 i.e., we replace the three statements with res = a_t w<< CONST. */
1335 last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1336 over_widen = true;
1339 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1340 return NULL;
1342 oprnd0 = gimple_assign_rhs1 (last_stmt);
1343 oprnd1 = gimple_assign_rhs2 (last_stmt);
1344 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1345 return NULL;
1347 /* Check operand 0: it has to be defined by a type promotion. */
1348 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1349 return NULL;
1351 /* Check operand 1: has to be positive. We check that it fits the type
1352 in vect_handle_widen_op_by_const (). */
1353 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1354 return NULL;
1356 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1357 type = gimple_expr_type (last_stmt);
1359 /* Check if this a widening operation. */
1360 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1361 &oprnd0, stmts,
1362 type, &half_type0, def_stmt0))
1363 return NULL;
1365 /* Handle unsigned case. Look for
1366 S4 u_res_T = (unsigned TYPE) res_T;
1367 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1368 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1370 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1371 imm_use_iterator imm_iter;
1372 use_operand_p use_p;
1373 int nuses = 0;
1374 tree use_type;
1376 if (over_widen)
1378 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1379 We check here that TYPE is the correct type for the operation,
1380 i.e., it's the type of the original result. */
1381 tree orig_type = gimple_expr_type (orig_stmt);
1382 if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1383 || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1384 return NULL;
1386 else
1388 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1390 if (is_gimple_debug (USE_STMT (use_p)))
1391 continue;
1392 use_stmt = USE_STMT (use_p);
1393 nuses++;
1396 if (nuses != 1 || !is_gimple_assign (use_stmt)
1397 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1398 return NULL;
1400 use_lhs = gimple_assign_lhs (use_stmt);
1401 use_type = TREE_TYPE (use_lhs);
1403 if (!INTEGRAL_TYPE_P (use_type)
1404 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1405 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1406 return NULL;
1408 type = use_type;
1412 /* Pattern detected. */
1413 if (vect_print_dump_info (REPORT_DETAILS))
1414 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1416 /* Check target support. */
1417 vectype = get_vectype_for_scalar_type (half_type0);
1418 vectype_out = get_vectype_for_scalar_type (type);
1420 if (!vectype
1421 || !vectype_out
1422 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1423 vectype_out, vectype,
1424 &dummy, &dummy, &dummy_code,
1425 &dummy_code, &dummy_int,
1426 &dummy_vec))
1427 return NULL;
1429 *type_in = vectype;
1430 *type_out = vectype_out;
1432 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1433 var = vect_recog_temp_ssa_var (type, NULL);
1434 pattern_stmt =
1435 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1437 if (vect_print_dump_info (REPORT_DETAILS))
1438 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1440 if (use_stmt)
1441 last_stmt = use_stmt;
1442 else
1443 last_stmt = orig_stmt;
1445 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1446 return pattern_stmt;
1449 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1450 vectorized:
1452 type a_t;
1453 TYPE b_T, res_T;
1455 S1 a_t = ;
1456 S2 b_T = ;
1457 S3 res_T = b_T op a_t;
1459 where type 'TYPE' is a type with different size than 'type',
1460 and op is <<, >> or rotate.
1462 Also detect cases:
1464 type a_t;
1465 TYPE b_T, c_T, res_T;
1467 S0 c_T = ;
1468 S1 a_t = (type) c_T;
1469 S2 b_T = ;
1470 S3 res_T = b_T op a_t;
1472 Input/Output:
1474 * STMTS: Contains a stmt from which the pattern search begins,
1475 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1476 with a shift/rotate which has same type on both operands, in the
1477 second case just b_T op c_T, in the first case with added cast
1478 from a_t to c_T in STMT_VINFO_PATTERN_DEF_STMT.
1480 Output:
1482 * TYPE_IN: The type of the input arguments to the pattern.
1484 * TYPE_OUT: The type of the output of this pattern.
1486 * Return value: A new stmt that will be used to replace the shift/rotate
1487 S3 stmt. */
1489 static gimple
1490 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1491 tree *type_in, tree *type_out)
1493 gimple last_stmt = VEC_pop (gimple, *stmts);
1494 tree oprnd0, oprnd1, lhs, var;
1495 gimple pattern_stmt, def_stmt;
1496 enum tree_code rhs_code;
1497 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1498 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1499 enum vect_def_type dt;
1500 tree def;
1502 if (!is_gimple_assign (last_stmt))
1503 return NULL;
1505 rhs_code = gimple_assign_rhs_code (last_stmt);
1506 switch (rhs_code)
1508 case LSHIFT_EXPR:
1509 case RSHIFT_EXPR:
1510 case LROTATE_EXPR:
1511 case RROTATE_EXPR:
1512 break;
1513 default:
1514 return NULL;
1517 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1518 return NULL;
1520 lhs = gimple_assign_lhs (last_stmt);
1521 oprnd0 = gimple_assign_rhs1 (last_stmt);
1522 oprnd1 = gimple_assign_rhs2 (last_stmt);
1523 if (TREE_CODE (oprnd0) != SSA_NAME
1524 || TREE_CODE (oprnd1) != SSA_NAME
1525 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1526 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1527 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1528 || TYPE_PRECISION (TREE_TYPE (lhs))
1529 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1530 return NULL;
1532 if (!vect_is_simple_use (oprnd1, loop_vinfo, NULL, &def_stmt, &def, &dt))
1533 return NULL;
1535 if (dt != vect_internal_def)
1536 return NULL;
1538 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1539 *type_out = *type_in;
1540 if (*type_in == NULL_TREE)
1541 return NULL;
1543 def = NULL_TREE;
1544 if (gimple_assign_cast_p (def_stmt))
1546 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1547 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1548 && TYPE_PRECISION (TREE_TYPE (rhs1))
1549 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1550 def = rhs1;
1553 if (def == NULL_TREE)
1555 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1556 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1557 NULL_TREE);
1558 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
1561 /* Pattern detected. */
1562 if (vect_print_dump_info (REPORT_DETAILS))
1563 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1565 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1566 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1567 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1569 if (vect_print_dump_info (REPORT_DETAILS))
1570 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1572 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1573 return pattern_stmt;
1576 /* Function vect_recog_mixed_size_cond_pattern
1578 Try to find the following pattern:
1580 type x_t, y_t;
1581 TYPE a_T, b_T, c_T;
1582 loop:
1583 S1 a_T = x_t CMP y_t ? b_T : c_T;
1585 where type 'TYPE' is an integral type which has different size
1586 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1587 than 'type', the constants need to fit into an integer type
1588 with the same width as 'type'.
1590 Input:
1592 * LAST_STMT: A stmt from which the pattern search begins.
1594 Output:
1596 * TYPE_IN: The type of the input arguments to the pattern.
1598 * TYPE_OUT: The type of the output of this pattern.
1600 * Return value: A new stmt that will be used to replace the pattern.
1601 Additionally a def_stmt is added.
1603 a_it = x_t CMP y_t ? b_it : c_it;
1604 a_T = (TYPE) a_it; */
1606 static gimple
1607 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1608 tree *type_out)
1610 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1611 tree cond_expr, then_clause, else_clause;
1612 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1613 tree type, vectype, comp_vectype, itype, vecitype;
1614 enum machine_mode cmpmode;
1615 gimple pattern_stmt, def_stmt;
1616 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1618 if (!is_gimple_assign (last_stmt)
1619 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1620 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1621 return NULL;
1623 cond_expr = gimple_assign_rhs1 (last_stmt);
1624 then_clause = gimple_assign_rhs2 (last_stmt);
1625 else_clause = gimple_assign_rhs3 (last_stmt);
1627 if (TREE_CODE (then_clause) != INTEGER_CST
1628 || TREE_CODE (else_clause) != INTEGER_CST)
1629 return NULL;
1631 if (!COMPARISON_CLASS_P (cond_expr))
1632 return NULL;
1634 comp_vectype
1635 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1636 if (comp_vectype == NULL_TREE)
1637 return NULL;
1639 type = gimple_expr_type (last_stmt);
1640 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1642 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1643 return NULL;
1645 vectype = get_vectype_for_scalar_type (type);
1646 if (vectype == NULL_TREE)
1647 return NULL;
1649 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1650 return NULL;
1652 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1653 TYPE_UNSIGNED (type));
1654 if (itype == NULL_TREE
1655 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1656 return NULL;
1658 vecitype = get_vectype_for_scalar_type (itype);
1659 if (vecitype == NULL_TREE)
1660 return NULL;
1662 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1663 return NULL;
1665 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1667 if (!int_fits_type_p (then_clause, itype)
1668 || !int_fits_type_p (else_clause, itype))
1669 return NULL;
1672 def_stmt
1673 = gimple_build_assign_with_ops3 (COND_EXPR,
1674 vect_recog_temp_ssa_var (itype, NULL),
1675 unshare_expr (cond_expr),
1676 fold_convert (itype, then_clause),
1677 fold_convert (itype, else_clause));
1678 pattern_stmt
1679 = gimple_build_assign_with_ops (NOP_EXPR,
1680 vect_recog_temp_ssa_var (type, NULL),
1681 gimple_assign_lhs (def_stmt), NULL_TREE);
1683 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
1684 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1685 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1686 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1687 *type_in = vecitype;
1688 *type_out = vectype;
1690 return pattern_stmt;
1694 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1695 true if bool VAR can be optimized that way. */
1697 static bool
1698 check_bool_pattern (tree var, loop_vec_info loop_vinfo)
1700 gimple def_stmt;
1701 enum vect_def_type dt;
1702 tree def, rhs1;
1703 enum tree_code rhs_code;
1705 if (!vect_is_simple_use (var, loop_vinfo, NULL, &def_stmt, &def, &dt))
1706 return false;
1708 if (dt != vect_internal_def)
1709 return false;
1711 if (!is_gimple_assign (def_stmt))
1712 return false;
1714 if (!has_single_use (def))
1715 return false;
1717 rhs1 = gimple_assign_rhs1 (def_stmt);
1718 rhs_code = gimple_assign_rhs_code (def_stmt);
1719 switch (rhs_code)
1721 case SSA_NAME:
1722 return check_bool_pattern (rhs1, loop_vinfo);
1724 CASE_CONVERT:
1725 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1726 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1727 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1728 return false;
1729 return check_bool_pattern (rhs1, loop_vinfo);
1731 case BIT_NOT_EXPR:
1732 return check_bool_pattern (rhs1, loop_vinfo);
1734 case BIT_AND_EXPR:
1735 case BIT_IOR_EXPR:
1736 case BIT_XOR_EXPR:
1737 if (!check_bool_pattern (rhs1, loop_vinfo))
1738 return false;
1739 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
1741 default:
1742 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
1744 tree vecitype, comp_vectype;
1746 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
1747 if (comp_vectype == NULL_TREE)
1748 return false;
1750 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
1752 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1753 tree itype
1754 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
1755 vecitype = get_vectype_for_scalar_type (itype);
1756 if (vecitype == NULL_TREE)
1757 return false;
1759 else
1760 vecitype = comp_vectype;
1761 return expand_vec_cond_expr_p (vecitype, comp_vectype);
1763 return false;
1768 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
1769 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1770 to PATTERN_DEF_STMT and adding a cast as RELATED_STMT. */
1772 static tree
1773 adjust_bool_pattern_cast (tree type, tree var)
1775 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
1776 gimple cast_stmt, pattern_stmt;
1778 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo));
1779 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1780 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = pattern_stmt;
1781 cast_stmt
1782 = gimple_build_assign_with_ops (NOP_EXPR,
1783 vect_recog_temp_ssa_var (type, NULL),
1784 gimple_assign_lhs (pattern_stmt),
1785 NULL_TREE);
1786 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
1787 return gimple_assign_lhs (cast_stmt);
1791 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
1792 recursively. VAR is an SSA_NAME that should be transformed from bool
1793 to a wider integer type, OUT_TYPE is the desired final integer type of
1794 the whole pattern, TRUEVAL should be NULL unless optimizing
1795 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
1796 in the then_clause, STMTS is where statements with added pattern stmts
1797 should be pushed to. */
1799 static tree
1800 adjust_bool_pattern (tree var, tree out_type, tree trueval,
1801 VEC (gimple, heap) **stmts)
1803 gimple stmt = SSA_NAME_DEF_STMT (var);
1804 enum tree_code rhs_code, def_rhs_code;
1805 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
1806 location_t loc;
1807 gimple pattern_stmt, def_stmt;
1809 rhs1 = gimple_assign_rhs1 (stmt);
1810 rhs2 = gimple_assign_rhs2 (stmt);
1811 rhs_code = gimple_assign_rhs_code (stmt);
1812 loc = gimple_location (stmt);
1813 switch (rhs_code)
1815 case SSA_NAME:
1816 CASE_CONVERT:
1817 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1818 itype = TREE_TYPE (irhs1);
1819 pattern_stmt
1820 = gimple_build_assign_with_ops (SSA_NAME,
1821 vect_recog_temp_ssa_var (itype, NULL),
1822 irhs1, NULL_TREE);
1823 break;
1825 case BIT_NOT_EXPR:
1826 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1827 itype = TREE_TYPE (irhs1);
1828 pattern_stmt
1829 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
1830 vect_recog_temp_ssa_var (itype, NULL),
1831 irhs1, build_int_cst (itype, 1));
1832 break;
1834 case BIT_AND_EXPR:
1835 /* Try to optimize x = y & (a < b ? 1 : 0); into
1836 x = (a < b ? y : 0);
1838 E.g. for:
1839 bool a_b, b_b, c_b;
1840 TYPE d_T;
1842 S1 a_b = x1 CMP1 y1;
1843 S2 b_b = x2 CMP2 y2;
1844 S3 c_b = a_b & b_b;
1845 S4 d_T = (TYPE) c_b;
1847 we would normally emit:
1849 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1850 S2' b_T = x2 CMP2 y2 ? 1 : 0;
1851 S3' c_T = a_T & b_T;
1852 S4' d_T = c_T;
1854 but we can save one stmt by using the
1855 result of one of the COND_EXPRs in the other COND_EXPR and leave
1856 BIT_AND_EXPR stmt out:
1858 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1859 S3' c_T = x2 CMP2 y2 ? a_T : 0;
1860 S4' f_T = c_T;
1862 At least when VEC_COND_EXPR is implemented using masks
1863 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
1864 computes the comparison masks and ands it, in one case with
1865 all ones vector, in the other case with a vector register.
1866 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
1867 often more expensive. */
1868 def_stmt = SSA_NAME_DEF_STMT (rhs2);
1869 def_rhs_code = gimple_assign_rhs_code (def_stmt);
1870 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
1872 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
1873 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1874 if (TYPE_PRECISION (TREE_TYPE (irhs1))
1875 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
1877 gimple tstmt;
1878 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
1879 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
1880 tstmt = VEC_pop (gimple, *stmts);
1881 gcc_assert (tstmt == def_stmt);
1882 VEC_quick_push (gimple, *stmts, stmt);
1883 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
1884 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
1885 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
1886 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
1887 return irhs2;
1889 else
1890 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
1891 goto and_ior_xor;
1893 def_stmt = SSA_NAME_DEF_STMT (rhs1);
1894 def_rhs_code = gimple_assign_rhs_code (def_stmt);
1895 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
1897 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
1898 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
1899 if (TYPE_PRECISION (TREE_TYPE (irhs2))
1900 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
1902 gimple tstmt;
1903 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
1904 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
1905 tstmt = VEC_pop (gimple, *stmts);
1906 gcc_assert (tstmt == def_stmt);
1907 VEC_quick_push (gimple, *stmts, stmt);
1908 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
1909 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
1910 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
1911 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
1912 return irhs1;
1914 else
1915 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1916 goto and_ior_xor;
1918 /* FALLTHRU */
1919 case BIT_IOR_EXPR:
1920 case BIT_XOR_EXPR:
1921 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1922 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
1923 and_ior_xor:
1924 if (TYPE_PRECISION (TREE_TYPE (irhs1))
1925 != TYPE_PRECISION (TREE_TYPE (irhs2)))
1927 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
1928 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
1929 int out_prec = TYPE_PRECISION (out_type);
1930 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
1931 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
1932 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
1933 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
1934 else
1936 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
1937 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
1940 itype = TREE_TYPE (irhs1);
1941 pattern_stmt
1942 = gimple_build_assign_with_ops (rhs_code,
1943 vect_recog_temp_ssa_var (itype, NULL),
1944 irhs1, irhs2);
1945 break;
1947 default:
1948 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
1949 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
1950 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1952 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1953 itype
1954 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
1956 else
1957 itype = TREE_TYPE (rhs1);
1958 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
1959 if (trueval == NULL_TREE)
1960 trueval = build_int_cst (itype, 1);
1961 else
1962 gcc_checking_assert (useless_type_conversion_p (itype,
1963 TREE_TYPE (trueval)));
1964 pattern_stmt
1965 = gimple_build_assign_with_ops3 (COND_EXPR,
1966 vect_recog_temp_ssa_var (itype, NULL),
1967 cond_expr, trueval,
1968 build_int_cst (itype, 0));
1969 break;
1972 VEC_safe_push (gimple, heap, *stmts, stmt);
1973 gimple_set_location (pattern_stmt, loc);
1974 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1975 return gimple_assign_lhs (pattern_stmt);
1979 /* Function vect_recog_bool_pattern
1981 Try to find pattern like following:
1983 bool a_b, b_b, c_b, d_b, e_b;
1984 TYPE f_T;
1985 loop:
1986 S1 a_b = x1 CMP1 y1;
1987 S2 b_b = x2 CMP2 y2;
1988 S3 c_b = a_b & b_b;
1989 S4 d_b = x3 CMP3 y3;
1990 S5 e_b = c_b | d_b;
1991 S6 f_T = (TYPE) e_b;
1993 where type 'TYPE' is an integral type.
1995 Input:
1997 * LAST_STMT: A stmt at the end from which the pattern
1998 search begins, i.e. cast of a bool to
1999 an integer type.
2001 Output:
2003 * TYPE_IN: The type of the input arguments to the pattern.
2005 * TYPE_OUT: The type of the output of this pattern.
2007 * Return value: A new stmt that will be used to replace the pattern.
2009 Assuming size of TYPE is the same as size of all comparisons
2010 (otherwise some casts would be added where needed), the above
2011 sequence we create related pattern stmts:
2012 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2013 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2014 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2015 S5' e_T = c_T | d_T;
2016 S6' f_T = e_T;
2018 Instead of the above S3' we could emit:
2019 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2020 S3' c_T = a_T | b_T;
2021 but the above is more efficient. */
2023 static gimple
2024 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2025 tree *type_out)
2027 gimple last_stmt = VEC_pop (gimple, *stmts);
2028 enum tree_code rhs_code;
2029 tree var, lhs, rhs, vectype;
2030 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2031 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2032 gimple pattern_stmt;
2034 if (!is_gimple_assign (last_stmt))
2035 return NULL;
2037 var = gimple_assign_rhs1 (last_stmt);
2038 lhs = gimple_assign_lhs (last_stmt);
2040 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2041 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2042 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2043 return NULL;
2045 rhs_code = gimple_assign_rhs_code (last_stmt);
2046 if (CONVERT_EXPR_CODE_P (rhs_code))
2048 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2049 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2050 return NULL;
2051 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2052 if (vectype == NULL_TREE)
2053 return NULL;
2055 if (!check_bool_pattern (var, loop_vinfo))
2056 return NULL;
2058 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2059 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2060 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2061 pattern_stmt
2062 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2063 else
2064 pattern_stmt
2065 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2066 *type_out = vectype;
2067 *type_in = vectype;
2068 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2069 return pattern_stmt;
2071 else if (rhs_code == SSA_NAME
2072 && STMT_VINFO_DATA_REF (stmt_vinfo))
2074 stmt_vec_info pattern_stmt_info;
2075 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2076 gcc_assert (vectype != NULL_TREE);
2077 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2078 return NULL;
2079 if (!check_bool_pattern (var, loop_vinfo))
2080 return NULL;
2082 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2083 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2084 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2086 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2087 gimple cast_stmt
2088 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2089 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt;
2090 rhs = rhs2;
2092 pattern_stmt
2093 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2094 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2095 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2096 STMT_VINFO_DATA_REF (pattern_stmt_info)
2097 = STMT_VINFO_DATA_REF (stmt_vinfo);
2098 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2099 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2100 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2101 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2102 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2103 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2104 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2105 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2106 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2107 *type_out = vectype;
2108 *type_in = vectype;
2109 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2110 return pattern_stmt;
2112 else
2113 return NULL;
2117 /* Mark statements that are involved in a pattern. */
2119 static inline void
2120 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2121 tree pattern_vectype)
2123 stmt_vec_info pattern_stmt_info, def_stmt_info;
2124 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2125 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2126 gimple def_stmt;
2128 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2129 if (pattern_stmt_info == NULL)
2131 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2132 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2134 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2136 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2137 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2138 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2139 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2140 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2141 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2142 STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
2143 = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
2144 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
2146 def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
2147 def_stmt_info = vinfo_for_stmt (def_stmt);
2148 if (def_stmt_info == NULL)
2150 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
2151 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2153 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2154 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2155 STMT_VINFO_DEF_TYPE (def_stmt_info)
2156 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2157 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2158 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2162 /* Function vect_pattern_recog_1
2164 Input:
2165 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2166 computation pattern.
2167 STMT: A stmt from which the pattern search should start.
2169 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2170 expression that computes the same functionality and can be used to
2171 replace the sequence of stmts that are involved in the pattern.
2173 Output:
2174 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2175 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2176 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2177 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2178 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2179 to the available target pattern.
2181 This function also does some bookkeeping, as explained in the documentation
2182 for vect_recog_pattern. */
2184 static void
2185 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2186 gimple_stmt_iterator si,
2187 VEC (gimple, heap) **stmts_to_replace)
2189 gimple stmt = gsi_stmt (si), pattern_stmt;
2190 stmt_vec_info stmt_info;
2191 loop_vec_info loop_vinfo;
2192 tree pattern_vectype;
2193 tree type_in, type_out;
2194 enum tree_code code;
2195 int i;
2196 gimple next;
2198 VEC_truncate (gimple, *stmts_to_replace, 0);
2199 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2200 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2201 if (!pattern_stmt)
2202 return;
2204 stmt = VEC_last (gimple, *stmts_to_replace);
2205 stmt_info = vinfo_for_stmt (stmt);
2206 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2208 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2210 /* No need to check target support (already checked by the pattern
2211 recognition function). */
2212 pattern_vectype = type_out ? type_out : type_in;
2214 else
2216 enum machine_mode vec_mode;
2217 enum insn_code icode;
2218 optab optab;
2220 /* Check target support */
2221 type_in = get_vectype_for_scalar_type (type_in);
2222 if (!type_in)
2223 return;
2224 if (type_out)
2225 type_out = get_vectype_for_scalar_type (type_out);
2226 else
2227 type_out = type_in;
2228 if (!type_out)
2229 return;
2230 pattern_vectype = type_out;
2232 if (is_gimple_assign (pattern_stmt))
2233 code = gimple_assign_rhs_code (pattern_stmt);
2234 else
2236 gcc_assert (is_gimple_call (pattern_stmt));
2237 code = CALL_EXPR;
2240 optab = optab_for_tree_code (code, type_in, optab_default);
2241 vec_mode = TYPE_MODE (type_in);
2242 if (!optab
2243 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2244 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2245 return;
2248 /* Found a vectorizable pattern. */
2249 if (vect_print_dump_info (REPORT_DETAILS))
2251 fprintf (vect_dump, "pattern recognized: ");
2252 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2255 /* Mark the stmts that are involved in the pattern. */
2256 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2258 /* Patterns cannot be vectorized using SLP, because they change the order of
2259 computation. */
2260 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2261 if (next == stmt)
2262 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2264 /* It is possible that additional pattern stmts are created and inserted in
2265 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2266 relevant statements. */
2267 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2268 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2269 i++)
2271 stmt_info = vinfo_for_stmt (stmt);
2272 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2273 if (vect_print_dump_info (REPORT_DETAILS))
2275 fprintf (vect_dump, "additional pattern stmt: ");
2276 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2279 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2284 /* Function vect_pattern_recog
2286 Input:
2287 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2288 computation idioms.
2290 Output - for each computation idiom that is detected we create a new stmt
2291 that provides the same functionality and that can be vectorized. We
2292 also record some information in the struct_stmt_info of the relevant
2293 stmts, as explained below:
2295 At the entry to this function we have the following stmts, with the
2296 following initial value in the STMT_VINFO fields:
2298 stmt in_pattern_p related_stmt vec_stmt
2299 S1: a_i = .... - - -
2300 S2: a_2 = ..use(a_i).. - - -
2301 S3: a_1 = ..use(a_2).. - - -
2302 S4: a_0 = ..use(a_1).. - - -
2303 S5: ... = ..use(a_0).. - - -
2305 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2306 represented by a single stmt. We then:
2307 - create a new stmt S6 equivalent to the pattern (the stmt is not
2308 inserted into the code)
2309 - fill in the STMT_VINFO fields as follows:
2311 in_pattern_p related_stmt vec_stmt
2312 S1: a_i = .... - - -
2313 S2: a_2 = ..use(a_i).. - - -
2314 S3: a_1 = ..use(a_2).. - - -
2315 S4: a_0 = ..use(a_1).. true S6 -
2316 '---> S6: a_new = .... - S4 -
2317 S5: ... = ..use(a_0).. - - -
2319 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2320 to each other through the RELATED_STMT field).
2322 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2323 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2324 remain irrelevant unless used by stmts other than S4.
2326 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2327 (because they are marked as irrelevant). It will vectorize S6, and record
2328 a pointer to the new vector stmt VS6 from S6 (as usual).
2329 S4 will be skipped, and S5 will be vectorized as usual:
2331 in_pattern_p related_stmt vec_stmt
2332 S1: a_i = .... - - -
2333 S2: a_2 = ..use(a_i).. - - -
2334 S3: a_1 = ..use(a_2).. - - -
2335 > VS6: va_new = .... - - -
2336 S4: a_0 = ..use(a_1).. true S6 VS6
2337 '---> S6: a_new = .... - S4 VS6
2338 > VS5: ... = ..vuse(va_new).. - - -
2339 S5: ... = ..use(a_0).. - - -
2341 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2342 elsewhere), and we'll end up with:
2344 VS6: va_new = ....
2345 VS5: ... = ..vuse(va_new)..
2347 In case of more than one pattern statements, e.g., widen-mult with
2348 intermediate type:
2350 S1 a_t = ;
2351 S2 a_T = (TYPE) a_t;
2352 '--> S3: a_it = (interm_type) a_t;
2353 S4 prod_T = a_T * CONST;
2354 '--> S5: prod_T' = a_it w* CONST;
2356 there may be other users of a_T outside the pattern. In that case S2 will
2357 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2358 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2359 be recorded in S3. */
2361 void
2362 vect_pattern_recog (loop_vec_info loop_vinfo)
2364 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2365 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2366 unsigned int nbbs = loop->num_nodes;
2367 gimple_stmt_iterator si;
2368 unsigned int i, j;
2369 vect_recog_func_ptr vect_recog_func;
2370 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2372 if (vect_print_dump_info (REPORT_DETAILS))
2373 fprintf (vect_dump, "=== vect_pattern_recog ===");
2375 /* Scan through the loop stmts, applying the pattern recognition
2376 functions starting at each stmt visited: */
2377 for (i = 0; i < nbbs; i++)
2379 basic_block bb = bbs[i];
2380 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2382 /* Scan over all generic vect_recog_xxx_pattern functions. */
2383 for (j = 0; j < NUM_PATTERNS; j++)
2385 vect_recog_func = vect_vect_recog_func_ptrs[j];
2386 vect_pattern_recog_1 (vect_recog_func, si,
2387 &stmts_to_replace);
2392 VEC_free (gimple, heap, stmts_to_replace);