2011-11-04 Tom de Vries <tom@codesourcery.com>
[official-gcc.git] / gcc / tree-vect-patterns.c
blob19b75e9e465524ff50430c28479a9cf7f01e84b1
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 tree *, tree *);
56 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
57 tree *, tree *);
58 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
59 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
60 vect_recog_widen_mult_pattern,
61 vect_recog_widen_sum_pattern,
62 vect_recog_dot_prod_pattern,
63 vect_recog_pow_pattern,
64 vect_recog_over_widening_pattern,
65 vect_recog_widen_shift_pattern,
66 vect_recog_vector_vector_shift_pattern,
67 vect_recog_mixed_size_cond_pattern,
68 vect_recog_bool_pattern};
70 /* Function widened_name_p
72 Check whether NAME, an ssa-name used in USE_STMT,
73 is a result of a type-promotion, such that:
74 DEF_STMT: NAME = NOP (name0)
75 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
76 If CHECK_SIGN is TRUE, check that either both types are signed or both are
77 unsigned. */
79 static bool
80 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
81 bool check_sign)
83 tree dummy;
84 gimple dummy_gimple;
85 loop_vec_info loop_vinfo;
86 stmt_vec_info stmt_vinfo;
87 tree type = TREE_TYPE (name);
88 tree oprnd0;
89 enum vect_def_type dt;
90 tree def;
92 stmt_vinfo = vinfo_for_stmt (use_stmt);
93 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
95 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
96 return false;
98 if (dt != vect_internal_def
99 && dt != vect_external_def && dt != vect_constant_def)
100 return false;
102 if (! *def_stmt)
103 return false;
105 if (!is_gimple_assign (*def_stmt))
106 return false;
108 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
109 return false;
111 oprnd0 = gimple_assign_rhs1 (*def_stmt);
113 *half_type = TREE_TYPE (oprnd0);
114 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
115 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
116 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
117 return false;
119 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
120 &dt))
121 return false;
123 return true;
126 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
127 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
129 static tree
130 vect_recog_temp_ssa_var (tree type, gimple stmt)
132 tree var = create_tmp_var (type, "patt");
134 add_referenced_var (var);
135 var = make_ssa_name (var, stmt);
136 return var;
139 /* Function vect_recog_dot_prod_pattern
141 Try to find the following pattern:
143 type x_t, y_t;
144 TYPE1 prod;
145 TYPE2 sum = init;
146 loop:
147 sum_0 = phi <init, sum_1>
148 S1 x_t = ...
149 S2 y_t = ...
150 S3 x_T = (TYPE1) x_t;
151 S4 y_T = (TYPE1) y_t;
152 S5 prod = x_T * y_T;
153 [S6 prod = (TYPE2) prod; #optional]
154 S7 sum_1 = prod + sum_0;
156 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
157 same size of 'TYPE1' or bigger. This is a special case of a reduction
158 computation.
160 Input:
162 * STMTS: Contains a stmt from which the pattern search begins. In the
163 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
164 will be detected.
166 Output:
168 * TYPE_IN: The type of the input arguments to the pattern.
170 * TYPE_OUT: The type of the output of this pattern.
172 * Return value: A new stmt that will be used to replace the sequence of
173 stmts that constitute the pattern. In this case it will be:
174 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
176 Note: The dot-prod idiom is a widening reduction pattern that is
177 vectorized without preserving all the intermediate results. It
178 produces only N/2 (widened) results (by summing up pairs of
179 intermediate results) rather than all N results. Therefore, we
180 cannot allow this pattern when we want to get all the results and in
181 the correct order (as is the case when this computation is in an
182 inner-loop nested in an outer-loop that us being vectorized). */
184 static gimple
185 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
186 tree *type_out)
188 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
189 tree oprnd0, oprnd1;
190 tree oprnd00, oprnd01;
191 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
192 tree type, half_type;
193 gimple pattern_stmt;
194 tree prod_type;
195 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
196 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
197 tree var;
199 if (!is_gimple_assign (last_stmt))
200 return NULL;
202 type = gimple_expr_type (last_stmt);
204 /* Look for the following pattern
205 DX = (TYPE1) X;
206 DY = (TYPE1) Y;
207 DPROD = DX * DY;
208 DDPROD = (TYPE2) DPROD;
209 sum_1 = DDPROD + sum_0;
210 In which
211 - DX is double the size of X
212 - DY is double the size of Y
213 - DX, DY, DPROD all have the same type
214 - sum is the same size of DPROD or bigger
215 - sum has been recognized as a reduction variable.
217 This is equivalent to:
218 DPROD = X w* Y; #widen mult
219 sum_1 = DPROD w+ sum_0; #widen summation
221 DPROD = X w* Y; #widen mult
222 sum_1 = DPROD + sum_0; #summation
225 /* Starting from LAST_STMT, follow the defs of its uses in search
226 of the above pattern. */
228 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
229 return NULL;
231 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
233 /* Has been detected as widening-summation? */
235 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
236 type = gimple_expr_type (stmt);
237 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
238 return NULL;
239 oprnd0 = gimple_assign_rhs1 (stmt);
240 oprnd1 = gimple_assign_rhs2 (stmt);
241 half_type = TREE_TYPE (oprnd0);
243 else
245 gimple def_stmt;
247 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
248 return NULL;
249 oprnd0 = gimple_assign_rhs1 (last_stmt);
250 oprnd1 = gimple_assign_rhs2 (last_stmt);
251 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
252 || !types_compatible_p (TREE_TYPE (oprnd1), type))
253 return NULL;
254 stmt = last_stmt;
256 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
258 stmt = def_stmt;
259 oprnd0 = gimple_assign_rhs1 (stmt);
261 else
262 half_type = type;
265 /* So far so good. Since last_stmt was detected as a (summation) reduction,
266 we know that oprnd1 is the reduction variable (defined by a loop-header
267 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
268 Left to check that oprnd0 is defined by a (widen_)mult_expr */
269 if (TREE_CODE (oprnd0) != SSA_NAME)
270 return NULL;
272 prod_type = half_type;
273 stmt = SSA_NAME_DEF_STMT (oprnd0);
275 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
276 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
277 return NULL;
279 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
280 inside the loop (in case we are analyzing an outer-loop). */
281 if (!is_gimple_assign (stmt))
282 return NULL;
283 stmt_vinfo = vinfo_for_stmt (stmt);
284 gcc_assert (stmt_vinfo);
285 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
286 return NULL;
287 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
288 return NULL;
289 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
291 /* Has been detected as a widening multiplication? */
293 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
294 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
295 return NULL;
296 stmt_vinfo = vinfo_for_stmt (stmt);
297 gcc_assert (stmt_vinfo);
298 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
299 oprnd00 = gimple_assign_rhs1 (stmt);
300 oprnd01 = gimple_assign_rhs2 (stmt);
302 else
304 tree half_type0, half_type1;
305 gimple def_stmt;
306 tree oprnd0, oprnd1;
308 oprnd0 = gimple_assign_rhs1 (stmt);
309 oprnd1 = gimple_assign_rhs2 (stmt);
310 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
311 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
312 return NULL;
313 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
314 return NULL;
315 oprnd00 = gimple_assign_rhs1 (def_stmt);
316 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
317 return NULL;
318 oprnd01 = gimple_assign_rhs1 (def_stmt);
319 if (!types_compatible_p (half_type0, half_type1))
320 return NULL;
321 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
322 return NULL;
325 half_type = TREE_TYPE (oprnd00);
326 *type_in = half_type;
327 *type_out = type;
329 /* Pattern detected. Create a stmt to be used to replace the pattern: */
330 var = vect_recog_temp_ssa_var (type, NULL);
331 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
332 oprnd00, oprnd01, oprnd1);
334 if (vect_print_dump_info (REPORT_DETAILS))
336 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
337 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
340 /* We don't allow changing the order of the computation in the inner-loop
341 when doing outer-loop vectorization. */
342 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
344 return pattern_stmt;
348 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
349 and LSHIFT_EXPR.
351 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
352 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
354 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
355 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
356 that satisfies the above restrictions, we can perform a widening opeartion
357 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
358 with a_it = (interm_type) a_t; */
360 static bool
361 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
362 tree const_oprnd, tree *oprnd,
363 VEC (gimple, heap) **stmts, tree type,
364 tree *half_type, gimple def_stmt)
366 tree new_type, new_oprnd, tmp;
367 gimple new_stmt;
368 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
369 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
371 if (code != MULT_EXPR && code != LSHIFT_EXPR)
372 return false;
374 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
375 || (code == LSHIFT_EXPR
376 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
377 != 1))
378 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
380 /* CONST_OPRND is a constant of HALF_TYPE. */
381 *oprnd = gimple_assign_rhs1 (def_stmt);
382 return true;
385 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
386 || !gimple_bb (def_stmt)
387 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
388 || !vinfo_for_stmt (def_stmt))
389 return false;
391 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
392 a type 2 times bigger than HALF_TYPE. */
393 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
394 TYPE_UNSIGNED (type));
395 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
396 || (code == LSHIFT_EXPR
397 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
398 return false;
400 /* Use NEW_TYPE for widening operation. */
401 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
403 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
404 /* Check if the already created pattern stmt is what we need. */
405 if (!is_gimple_assign (new_stmt)
406 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
407 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
408 return false;
410 VEC_safe_push (gimple, heap, *stmts, def_stmt);
411 *oprnd = gimple_assign_lhs (new_stmt);
413 else
415 /* Create a_T = (NEW_TYPE) a_t; */
416 *oprnd = gimple_assign_rhs1 (def_stmt);
417 tmp = create_tmp_var (new_type, NULL);
418 add_referenced_var (tmp);
419 new_oprnd = make_ssa_name (tmp, NULL);
420 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
421 NULL_TREE);
422 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
423 VEC_safe_push (gimple, heap, *stmts, def_stmt);
424 *oprnd = new_oprnd;
427 *half_type = new_type;
428 return true;
432 /* Function vect_recog_widen_mult_pattern
434 Try to find the following pattern:
436 type a_t, b_t;
437 TYPE a_T, b_T, prod_T;
439 S1 a_t = ;
440 S2 b_t = ;
441 S3 a_T = (TYPE) a_t;
442 S4 b_T = (TYPE) b_t;
443 S5 prod_T = a_T * b_T;
445 where type 'TYPE' is at least double the size of type 'type'.
447 Also detect unsgigned cases:
449 unsigned type a_t, b_t;
450 unsigned TYPE u_prod_T;
451 TYPE a_T, b_T, prod_T;
453 S1 a_t = ;
454 S2 b_t = ;
455 S3 a_T = (TYPE) a_t;
456 S4 b_T = (TYPE) b_t;
457 S5 prod_T = a_T * b_T;
458 S6 u_prod_T = (unsigned TYPE) prod_T;
460 and multiplication by constants:
462 type a_t;
463 TYPE a_T, prod_T;
465 S1 a_t = ;
466 S3 a_T = (TYPE) a_t;
467 S5 prod_T = a_T * CONST;
469 A special case of multiplication by constants is when 'TYPE' is 4 times
470 bigger than 'type', but CONST fits an intermediate type 2 times smaller
471 than 'TYPE'. In that case we create an additional pattern stmt for S3
472 to create a variable of the intermediate type, and perform widen-mult
473 on the intermediate type as well:
475 type a_t;
476 interm_type a_it;
477 TYPE a_T, prod_T, prod_T';
479 S1 a_t = ;
480 S3 a_T = (TYPE) a_t;
481 '--> a_it = (interm_type) a_t;
482 S5 prod_T = a_T * CONST;
483 '--> prod_T' = a_it w* CONST;
485 Input/Output:
487 * STMTS: Contains a stmt from which the pattern search begins. In the
488 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
489 is detected. In case of unsigned widen-mult, the original stmt (S5) is
490 replaced with S6 in STMTS. In case of multiplication by a constant
491 of an intermediate type (the last case above), STMTS also contains S3
492 (inserted before S5).
494 Output:
496 * TYPE_IN: The type of the input arguments to the pattern.
498 * TYPE_OUT: The type of the output of this pattern.
500 * Return value: A new stmt that will be used to replace the sequence of
501 stmts that constitute the pattern. In this case it will be:
502 WIDEN_MULT <a_t, b_t>
505 static gimple
506 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
507 tree *type_in, tree *type_out)
509 gimple last_stmt = VEC_pop (gimple, *stmts);
510 gimple def_stmt0, def_stmt1;
511 tree oprnd0, oprnd1;
512 tree type, half_type0, half_type1;
513 gimple pattern_stmt;
514 tree vectype, vectype_out = NULL_TREE;
515 tree dummy;
516 tree var;
517 enum tree_code dummy_code;
518 int dummy_int;
519 VEC (tree, heap) *dummy_vec;
520 bool op1_ok;
522 if (!is_gimple_assign (last_stmt))
523 return NULL;
525 type = gimple_expr_type (last_stmt);
527 /* Starting from LAST_STMT, follow the defs of its uses in search
528 of the above pattern. */
530 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
531 return NULL;
533 oprnd0 = gimple_assign_rhs1 (last_stmt);
534 oprnd1 = gimple_assign_rhs2 (last_stmt);
535 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
536 || !types_compatible_p (TREE_TYPE (oprnd1), type))
537 return NULL;
539 /* Check argument 0. */
540 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
541 return NULL;
542 /* Check argument 1. */
543 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
545 if (op1_ok)
547 oprnd0 = gimple_assign_rhs1 (def_stmt0);
548 oprnd1 = gimple_assign_rhs1 (def_stmt1);
550 else
552 if (TREE_CODE (oprnd1) == INTEGER_CST
553 && TREE_CODE (half_type0) == INTEGER_TYPE
554 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
555 &oprnd0, stmts, type,
556 &half_type0, def_stmt0))
557 half_type1 = half_type0;
558 else
559 return NULL;
562 /* Handle unsigned case. Look for
563 S6 u_prod_T = (unsigned TYPE) prod_T;
564 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
565 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
567 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
568 imm_use_iterator imm_iter;
569 use_operand_p use_p;
570 int nuses = 0;
571 gimple use_stmt = NULL;
572 tree use_type;
574 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
575 return NULL;
577 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
579 if (is_gimple_debug (USE_STMT (use_p)))
580 continue;
581 use_stmt = USE_STMT (use_p);
582 nuses++;
585 if (nuses != 1 || !is_gimple_assign (use_stmt)
586 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
587 return NULL;
589 use_lhs = gimple_assign_lhs (use_stmt);
590 use_type = TREE_TYPE (use_lhs);
591 if (!INTEGRAL_TYPE_P (use_type)
592 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
593 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
594 return NULL;
596 type = use_type;
597 last_stmt = use_stmt;
600 if (!types_compatible_p (half_type0, half_type1))
601 return NULL;
603 /* Pattern detected. */
604 if (vect_print_dump_info (REPORT_DETAILS))
605 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
607 /* Check target support */
608 vectype = get_vectype_for_scalar_type (half_type0);
609 vectype_out = get_vectype_for_scalar_type (type);
610 if (!vectype
611 || !vectype_out
612 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
613 vectype_out, vectype,
614 &dummy, &dummy, &dummy_code,
615 &dummy_code, &dummy_int, &dummy_vec))
616 return NULL;
618 *type_in = vectype;
619 *type_out = vectype_out;
621 /* Pattern supported. Create a stmt to be used to replace the pattern: */
622 var = vect_recog_temp_ssa_var (type, NULL);
623 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
624 oprnd1);
626 if (vect_print_dump_info (REPORT_DETAILS))
627 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
629 VEC_safe_push (gimple, heap, *stmts, last_stmt);
630 return pattern_stmt;
634 /* Function vect_recog_pow_pattern
636 Try to find the following pattern:
638 x = POW (y, N);
640 with POW being one of pow, powf, powi, powif and N being
641 either 2 or 0.5.
643 Input:
645 * LAST_STMT: A stmt from which the pattern search begins.
647 Output:
649 * TYPE_IN: The type of the input arguments to the pattern.
651 * TYPE_OUT: The type of the output of this pattern.
653 * Return value: A new stmt that will be used to replace the sequence of
654 stmts that constitute the pattern. In this case it will be:
655 x = x * x
657 x = sqrt (x)
660 static gimple
661 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
662 tree *type_out)
664 gimple last_stmt = VEC_index (gimple, *stmts, 0);
665 tree fn, base, exp = NULL;
666 gimple stmt;
667 tree var;
669 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
670 return NULL;
672 fn = gimple_call_fndecl (last_stmt);
673 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
674 return NULL;
676 switch (DECL_FUNCTION_CODE (fn))
678 case BUILT_IN_POWIF:
679 case BUILT_IN_POWI:
680 case BUILT_IN_POWF:
681 case BUILT_IN_POW:
682 base = gimple_call_arg (last_stmt, 0);
683 exp = gimple_call_arg (last_stmt, 1);
684 if (TREE_CODE (exp) != REAL_CST
685 && TREE_CODE (exp) != INTEGER_CST)
686 return NULL;
687 break;
689 default:
690 return NULL;
693 /* We now have a pow or powi builtin function call with a constant
694 exponent. */
696 *type_out = NULL_TREE;
698 /* Catch squaring. */
699 if ((host_integerp (exp, 0)
700 && tree_low_cst (exp, 0) == 2)
701 || (TREE_CODE (exp) == REAL_CST
702 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
704 *type_in = TREE_TYPE (base);
706 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
707 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
708 return stmt;
711 /* Catch square root. */
712 if (TREE_CODE (exp) == REAL_CST
713 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
715 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
716 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
717 if (*type_in)
719 gimple stmt = gimple_build_call (newfn, 1, base);
720 if (vectorizable_function (stmt, *type_in, *type_in)
721 != NULL_TREE)
723 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
724 gimple_call_set_lhs (stmt, var);
725 return stmt;
730 return NULL;
734 /* Function vect_recog_widen_sum_pattern
736 Try to find the following pattern:
738 type x_t;
739 TYPE x_T, sum = init;
740 loop:
741 sum_0 = phi <init, sum_1>
742 S1 x_t = *p;
743 S2 x_T = (TYPE) x_t;
744 S3 sum_1 = x_T + sum_0;
746 where type 'TYPE' is at least double the size of type 'type', i.e - we're
747 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
748 a special case of a reduction computation.
750 Input:
752 * LAST_STMT: A stmt from which the pattern search begins. In the example,
753 when this function is called with S3, the pattern {S2,S3} will be detected.
755 Output:
757 * TYPE_IN: The type of the input arguments to the pattern.
759 * TYPE_OUT: The type of the output of this pattern.
761 * Return value: A new stmt that will be used to replace the sequence of
762 stmts that constitute the pattern. In this case it will be:
763 WIDEN_SUM <x_t, sum_0>
765 Note: The widening-sum idiom is a widening reduction pattern that is
766 vectorized without preserving all the intermediate results. It
767 produces only N/2 (widened) results (by summing up pairs of
768 intermediate results) rather than all N results. Therefore, we
769 cannot allow this pattern when we want to get all the results and in
770 the correct order (as is the case when this computation is in an
771 inner-loop nested in an outer-loop that us being vectorized). */
773 static gimple
774 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
775 tree *type_out)
777 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
778 tree oprnd0, oprnd1;
779 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
780 tree type, half_type;
781 gimple pattern_stmt;
782 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
783 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
784 tree var;
786 if (!is_gimple_assign (last_stmt))
787 return NULL;
789 type = gimple_expr_type (last_stmt);
791 /* Look for the following pattern
792 DX = (TYPE) X;
793 sum_1 = DX + sum_0;
794 In which DX is at least double the size of X, and sum_1 has been
795 recognized as a reduction variable.
798 /* Starting from LAST_STMT, follow the defs of its uses in search
799 of the above pattern. */
801 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
802 return NULL;
804 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
805 return NULL;
807 oprnd0 = gimple_assign_rhs1 (last_stmt);
808 oprnd1 = gimple_assign_rhs2 (last_stmt);
809 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
810 || !types_compatible_p (TREE_TYPE (oprnd1), type))
811 return NULL;
813 /* So far so good. Since last_stmt was detected as a (summation) reduction,
814 we know that oprnd1 is the reduction variable (defined by a loop-header
815 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
816 Left to check that oprnd0 is defined by a cast from type 'type' to type
817 'TYPE'. */
819 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
820 return NULL;
822 oprnd0 = gimple_assign_rhs1 (stmt);
823 *type_in = half_type;
824 *type_out = type;
826 /* Pattern detected. Create a stmt to be used to replace the pattern: */
827 var = vect_recog_temp_ssa_var (type, NULL);
828 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
829 oprnd0, oprnd1);
831 if (vect_print_dump_info (REPORT_DETAILS))
833 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
834 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
837 /* We don't allow changing the order of the computation in the inner-loop
838 when doing outer-loop vectorization. */
839 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
841 return pattern_stmt;
845 /* Return TRUE if the operation in STMT can be performed on a smaller type.
847 Input:
848 STMT - a statement to check.
849 DEF - we support operations with two operands, one of which is constant.
850 The other operand can be defined by a demotion operation, or by a
851 previous statement in a sequence of over-promoted operations. In the
852 later case DEF is used to replace that operand. (It is defined by a
853 pattern statement we created for the previous statement in the
854 sequence).
856 Input/output:
857 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
858 NULL, it's the type of DEF.
859 STMTS - additional pattern statements. If a pattern statement (type
860 conversion) is created in this function, its original statement is
861 added to STMTS.
863 Output:
864 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
865 operands to use in the new pattern statement for STMT (will be created
866 in vect_recog_over_widening_pattern ()).
867 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
868 statements for STMT: the first one is a type promotion and the second
869 one is the operation itself. We return the type promotion statement
870 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
871 the second pattern statement. */
873 static bool
874 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
875 tree *op0, tree *op1, gimple *new_def_stmt,
876 VEC (gimple, heap) **stmts)
878 enum tree_code code;
879 tree const_oprnd, oprnd;
880 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
881 gimple def_stmt, new_stmt;
882 bool first = false;
883 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
884 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
886 *new_def_stmt = NULL;
888 if (!is_gimple_assign (stmt))
889 return false;
891 code = gimple_assign_rhs_code (stmt);
892 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
893 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
894 return false;
896 oprnd = gimple_assign_rhs1 (stmt);
897 const_oprnd = gimple_assign_rhs2 (stmt);
898 type = gimple_expr_type (stmt);
900 if (TREE_CODE (oprnd) != SSA_NAME
901 || TREE_CODE (const_oprnd) != INTEGER_CST)
902 return false;
904 /* If we are in the middle of a sequence, we use DEF from a previous
905 statement. Otherwise, OPRND has to be a result of type promotion. */
906 if (*new_type)
908 half_type = *new_type;
909 oprnd = def;
911 else
913 first = true;
914 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
915 || !gimple_bb (def_stmt)
916 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
917 || !vinfo_for_stmt (def_stmt))
918 return false;
921 /* Can we perform the operation on a smaller type? */
922 switch (code)
924 case BIT_IOR_EXPR:
925 case BIT_XOR_EXPR:
926 case BIT_AND_EXPR:
927 if (!int_fits_type_p (const_oprnd, half_type))
929 /* HALF_TYPE is not enough. Try a bigger type if possible. */
930 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
931 return false;
933 interm_type = build_nonstandard_integer_type (
934 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
935 if (!int_fits_type_p (const_oprnd, interm_type))
936 return false;
939 break;
941 case LSHIFT_EXPR:
942 /* Try intermediate type - HALF_TYPE is not enough for sure. */
943 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
944 return false;
946 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
947 (e.g., if the original value was char, the shift amount is at most 8
948 if we want to use short). */
949 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
950 return false;
952 interm_type = build_nonstandard_integer_type (
953 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
955 if (!vect_supportable_shift (code, interm_type))
956 return false;
958 break;
960 case RSHIFT_EXPR:
961 if (vect_supportable_shift (code, half_type))
962 break;
964 /* Try intermediate type - HALF_TYPE is not supported. */
965 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
966 return false;
968 interm_type = build_nonstandard_integer_type (
969 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
971 if (!vect_supportable_shift (code, interm_type))
972 return false;
974 break;
976 default:
977 gcc_unreachable ();
980 /* There are four possible cases:
981 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
982 the first statement in the sequence)
983 a. The original, HALF_TYPE, is not enough - we replace the promotion
984 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
985 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
986 promotion.
987 2. OPRND is defined by a pattern statement we created.
988 a. Its type is not sufficient for the operation, we create a new stmt:
989 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
990 this statement in NEW_DEF_STMT, and it is later put in
991 STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
992 b. OPRND is good to use in the new statement. */
993 if (first)
995 if (interm_type)
997 /* Replace the original type conversion HALF_TYPE->TYPE with
998 HALF_TYPE->INTERM_TYPE. */
999 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1001 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1002 /* Check if the already created pattern stmt is what we need. */
1003 if (!is_gimple_assign (new_stmt)
1004 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1005 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1006 return false;
1008 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1009 oprnd = gimple_assign_lhs (new_stmt);
1011 else
1013 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1014 oprnd = gimple_assign_rhs1 (def_stmt);
1015 tmp = create_tmp_reg (interm_type, NULL);
1016 add_referenced_var (tmp);
1017 new_oprnd = make_ssa_name (tmp, NULL);
1018 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1019 oprnd, NULL_TREE);
1020 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1021 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1022 oprnd = new_oprnd;
1025 else
1027 /* Retrieve the operand before the type promotion. */
1028 oprnd = gimple_assign_rhs1 (def_stmt);
1031 else
1033 if (interm_type)
1035 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1036 tmp = create_tmp_reg (interm_type, NULL);
1037 add_referenced_var (tmp);
1038 new_oprnd = make_ssa_name (tmp, NULL);
1039 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1040 oprnd, NULL_TREE);
1041 oprnd = new_oprnd;
1042 *new_def_stmt = new_stmt;
1045 /* Otherwise, OPRND is already set. */
1048 if (interm_type)
1049 *new_type = interm_type;
1050 else
1051 *new_type = half_type;
1053 *op0 = oprnd;
1054 *op1 = fold_convert (*new_type, const_oprnd);
1056 return true;
1060 /* Try to find a statement or a sequence of statements that can be performed
1061 on a smaller type:
1063 type x_t;
1064 TYPE x_T, res0_T, res1_T;
1065 loop:
1066 S1 x_t = *p;
1067 S2 x_T = (TYPE) x_t;
1068 S3 res0_T = op (x_T, C0);
1069 S4 res1_T = op (res0_T, C1);
1070 S5 ... = () res1_T; - type demotion
1072 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1073 constants.
1074 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1075 be 'type' or some intermediate type. For now, we expect S5 to be a type
1076 demotion operation. We also check that S3 and S4 have only one use. */
1078 static gimple
1079 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1080 tree *type_in, tree *type_out)
1082 gimple stmt = VEC_pop (gimple, *stmts);
1083 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1084 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1085 imm_use_iterator imm_iter;
1086 use_operand_p use_p;
1087 int nuses = 0;
1088 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1089 bool first;
1090 struct loop *loop = (gimple_bb (stmt))->loop_father;
1092 first = true;
1093 while (1)
1095 if (!vinfo_for_stmt (stmt)
1096 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1097 return NULL;
1099 new_def_stmt = NULL;
1100 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1101 &op0, &op1, &new_def_stmt,
1102 stmts))
1104 if (first)
1105 return NULL;
1106 else
1107 break;
1110 /* STMT can be performed on a smaller type. Check its uses. */
1111 lhs = gimple_assign_lhs (stmt);
1112 nuses = 0;
1113 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1115 if (is_gimple_debug (USE_STMT (use_p)))
1116 continue;
1117 use_stmt = USE_STMT (use_p);
1118 nuses++;
1121 if (nuses != 1 || !is_gimple_assign (use_stmt)
1122 || !gimple_bb (use_stmt)
1123 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1124 return NULL;
1126 /* Create pattern statement for STMT. */
1127 vectype = get_vectype_for_scalar_type (new_type);
1128 if (!vectype)
1129 return NULL;
1131 /* We want to collect all the statements for which we create pattern
1132 statetments, except for the case when the last statement in the
1133 sequence doesn't have a corresponding pattern statement. In such
1134 case we associate the last pattern statement with the last statement
1135 in the sequence. Therefore, we only add the original statement to
1136 the list if we know that it is not the last. */
1137 if (prev_stmt)
1138 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1140 var = vect_recog_temp_ssa_var (new_type, NULL);
1141 pattern_stmt
1142 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1143 op0, op1);
1144 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1145 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
1147 if (vect_print_dump_info (REPORT_DETAILS))
1149 fprintf (vect_dump, "created pattern stmt: ");
1150 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1153 prev_stmt = stmt;
1154 stmt = use_stmt;
1156 first = false;
1159 /* We got a sequence. We expect it to end with a type demotion operation.
1160 Otherwise, we quit (for now). There are three possible cases: the
1161 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1162 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1163 NEW_TYPE differs (we create a new conversion statement). */
1164 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1166 use_lhs = gimple_assign_lhs (use_stmt);
1167 use_type = TREE_TYPE (use_lhs);
1168 /* Support only type promotion or signedess change. */
1169 if (!INTEGRAL_TYPE_P (use_type)
1170 || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1171 return NULL;
1173 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1174 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1176 /* Create NEW_TYPE->USE_TYPE conversion. */
1177 tmp = create_tmp_reg (use_type, NULL);
1178 add_referenced_var (tmp);
1179 new_oprnd = make_ssa_name (tmp, NULL);
1180 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1181 var, NULL_TREE);
1182 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1184 *type_in = get_vectype_for_scalar_type (new_type);
1185 *type_out = get_vectype_for_scalar_type (use_type);
1187 /* We created a pattern statement for the last statement in the
1188 sequence, so we don't need to associate it with the pattern
1189 statement created for PREV_STMT. Therefore, we add PREV_STMT
1190 to the list in order to mark it later in vect_pattern_recog_1. */
1191 if (prev_stmt)
1192 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1194 else
1196 if (prev_stmt)
1197 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
1198 = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
1200 *type_in = vectype;
1201 *type_out = NULL_TREE;
1204 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1206 else
1207 /* TODO: support general case, create a conversion to the correct type. */
1208 return NULL;
1210 /* Pattern detected. */
1211 if (vect_print_dump_info (REPORT_DETAILS))
1213 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1214 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1217 return pattern_stmt;
1220 /* Detect widening shift pattern:
1222 type a_t;
1223 TYPE a_T, res_T;
1225 S1 a_t = ;
1226 S2 a_T = (TYPE) a_t;
1227 S3 res_T = a_T << CONST;
1229 where type 'TYPE' is at least double the size of type 'type'.
1231 Also detect unsigned cases:
1233 unsigned type a_t;
1234 unsigned TYPE u_res_T;
1235 TYPE a_T, res_T;
1237 S1 a_t = ;
1238 S2 a_T = (TYPE) a_t;
1239 S3 res_T = a_T << CONST;
1240 S4 u_res_T = (unsigned TYPE) res_T;
1242 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1243 create an additional pattern stmt for S2 to create a variable of an
1244 intermediate type, and perform widen-shift on the intermediate type:
1246 type a_t;
1247 interm_type a_it;
1248 TYPE a_T, res_T, res_T';
1250 S1 a_t = ;
1251 S2 a_T = (TYPE) a_t;
1252 '--> a_it = (interm_type) a_t;
1253 S3 res_T = a_T << CONST;
1254 '--> res_T' = a_it <<* CONST;
1256 Input/Output:
1258 * STMTS: Contains a stmt from which the pattern search begins.
1259 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1260 in STMTS. When an intermediate type is used and a pattern statement is
1261 created for S2, we also put S2 here (before S3).
1263 Output:
1265 * TYPE_IN: The type of the input arguments to the pattern.
1267 * TYPE_OUT: The type of the output of this pattern.
1269 * Return value: A new stmt that will be used to replace the sequence of
1270 stmts that constitute the pattern. In this case it will be:
1271 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1273 static gimple
1274 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1275 tree *type_in, tree *type_out)
1277 gimple last_stmt = VEC_pop (gimple, *stmts);
1278 gimple def_stmt0;
1279 tree oprnd0, oprnd1;
1280 tree type, half_type0;
1281 gimple pattern_stmt, orig_stmt = NULL;
1282 tree vectype, vectype_out = NULL_TREE;
1283 tree dummy;
1284 tree var;
1285 enum tree_code dummy_code;
1286 int dummy_int;
1287 VEC (tree, heap) * dummy_vec;
1288 gimple use_stmt = NULL;
1289 bool over_widen = false;
1291 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1292 return NULL;
1294 orig_stmt = last_stmt;
1295 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1297 /* This statement was also detected as over-widening operation (it can't
1298 be any other pattern, because only over-widening detects shifts).
1299 LAST_STMT is the final type demotion statement, but its related
1300 statement is shift. We analyze the related statement to catch cases:
1302 orig code:
1303 type a_t;
1304 itype res;
1305 TYPE a_T, res_T;
1307 S1 a_T = (TYPE) a_t;
1308 S2 res_T = a_T << CONST;
1309 S3 res = (itype)res_T;
1311 (size of type * 2 <= size of itype
1312 and size of itype * 2 <= size of TYPE)
1314 code after over-widening pattern detection:
1316 S1 a_T = (TYPE) a_t;
1317 --> a_it = (itype) a_t;
1318 S2 res_T = a_T << CONST;
1319 S3 res = (itype)res_T; <--- LAST_STMT
1320 --> res = a_it << CONST;
1322 after widen_shift:
1324 S1 a_T = (TYPE) a_t;
1325 --> a_it = (itype) a_t; - redundant
1326 S2 res_T = a_T << CONST;
1327 S3 res = (itype)res_T;
1328 --> res = a_t w<< CONST;
1330 i.e., we replace the three statements with res = a_t w<< CONST. */
1331 last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1332 over_widen = true;
1335 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1336 return NULL;
1338 oprnd0 = gimple_assign_rhs1 (last_stmt);
1339 oprnd1 = gimple_assign_rhs2 (last_stmt);
1340 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1341 return NULL;
1343 /* Check operand 0: it has to be defined by a type promotion. */
1344 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1345 return NULL;
1347 /* Check operand 1: has to be positive. We check that it fits the type
1348 in vect_handle_widen_op_by_const (). */
1349 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1350 return NULL;
1352 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1353 type = gimple_expr_type (last_stmt);
1355 /* Check if this a widening operation. */
1356 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1357 &oprnd0, stmts,
1358 type, &half_type0, def_stmt0))
1359 return NULL;
1361 /* Handle unsigned case. Look for
1362 S4 u_res_T = (unsigned TYPE) res_T;
1363 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1364 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1366 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1367 imm_use_iterator imm_iter;
1368 use_operand_p use_p;
1369 int nuses = 0;
1370 tree use_type;
1372 if (over_widen)
1374 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1375 We check here that TYPE is the correct type for the operation,
1376 i.e., it's the type of the original result. */
1377 tree orig_type = gimple_expr_type (orig_stmt);
1378 if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1379 || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1380 return NULL;
1382 else
1384 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1386 if (is_gimple_debug (USE_STMT (use_p)))
1387 continue;
1388 use_stmt = USE_STMT (use_p);
1389 nuses++;
1392 if (nuses != 1 || !is_gimple_assign (use_stmt)
1393 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1394 return NULL;
1396 use_lhs = gimple_assign_lhs (use_stmt);
1397 use_type = TREE_TYPE (use_lhs);
1399 if (!INTEGRAL_TYPE_P (use_type)
1400 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1401 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1402 return NULL;
1404 type = use_type;
1408 /* Pattern detected. */
1409 if (vect_print_dump_info (REPORT_DETAILS))
1410 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1412 /* Check target support. */
1413 vectype = get_vectype_for_scalar_type (half_type0);
1414 vectype_out = get_vectype_for_scalar_type (type);
1416 if (!vectype
1417 || !vectype_out
1418 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1419 vectype_out, vectype,
1420 &dummy, &dummy, &dummy_code,
1421 &dummy_code, &dummy_int,
1422 &dummy_vec))
1423 return NULL;
1425 *type_in = vectype;
1426 *type_out = vectype_out;
1428 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1429 var = vect_recog_temp_ssa_var (type, NULL);
1430 pattern_stmt =
1431 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1433 if (vect_print_dump_info (REPORT_DETAILS))
1434 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1436 if (use_stmt)
1437 last_stmt = use_stmt;
1438 else
1439 last_stmt = orig_stmt;
1441 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1442 return pattern_stmt;
1445 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1446 vectorized:
1448 type a_t;
1449 TYPE b_T, res_T;
1451 S1 a_t = ;
1452 S2 b_T = ;
1453 S3 res_T = b_T op a_t;
1455 where type 'TYPE' is a type with different size than 'type',
1456 and op is <<, >> or rotate.
1458 Also detect cases:
1460 type a_t;
1461 TYPE b_T, c_T, res_T;
1463 S0 c_T = ;
1464 S1 a_t = (type) c_T;
1465 S2 b_T = ;
1466 S3 res_T = b_T op a_t;
1468 Input/Output:
1470 * STMTS: Contains a stmt from which the pattern search begins,
1471 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1472 with a shift/rotate which has same type on both operands, in the
1473 second case just b_T op c_T, in the first case with added cast
1474 from a_t to c_T in STMT_VINFO_PATTERN_DEF_STMT.
1476 Output:
1478 * TYPE_IN: The type of the input arguments to the pattern.
1480 * TYPE_OUT: The type of the output of this pattern.
1482 * Return value: A new stmt that will be used to replace the shift/rotate
1483 S3 stmt. */
1485 static gimple
1486 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1487 tree *type_in, tree *type_out)
1489 gimple last_stmt = VEC_pop (gimple, *stmts);
1490 tree oprnd0, oprnd1, lhs, var;
1491 gimple pattern_stmt, def_stmt;
1492 enum tree_code rhs_code;
1493 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1494 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1495 enum vect_def_type dt;
1496 tree def;
1498 if (!is_gimple_assign (last_stmt))
1499 return NULL;
1501 rhs_code = gimple_assign_rhs_code (last_stmt);
1502 switch (rhs_code)
1504 case LSHIFT_EXPR:
1505 case RSHIFT_EXPR:
1506 case LROTATE_EXPR:
1507 case RROTATE_EXPR:
1508 break;
1509 default:
1510 return NULL;
1513 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1514 return NULL;
1516 lhs = gimple_assign_lhs (last_stmt);
1517 oprnd0 = gimple_assign_rhs1 (last_stmt);
1518 oprnd1 = gimple_assign_rhs2 (last_stmt);
1519 if (TREE_CODE (oprnd0) != SSA_NAME
1520 || TREE_CODE (oprnd1) != SSA_NAME
1521 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1522 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1523 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1524 || TYPE_PRECISION (TREE_TYPE (lhs))
1525 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1526 return NULL;
1528 if (!vect_is_simple_use (oprnd1, loop_vinfo, NULL, &def_stmt, &def, &dt))
1529 return NULL;
1531 if (dt != vect_internal_def)
1532 return NULL;
1534 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1535 *type_out = *type_in;
1536 if (*type_in == NULL_TREE)
1537 return NULL;
1539 def = NULL_TREE;
1540 if (gimple_assign_cast_p (def_stmt))
1542 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1543 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1544 && TYPE_PRECISION (TREE_TYPE (rhs1))
1545 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1546 def = rhs1;
1549 if (def == NULL_TREE)
1551 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1552 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1553 NULL_TREE);
1554 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
1557 /* Pattern detected. */
1558 if (vect_print_dump_info (REPORT_DETAILS))
1559 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1561 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1562 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1563 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1565 if (vect_print_dump_info (REPORT_DETAILS))
1566 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1568 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1569 return pattern_stmt;
1572 /* Function vect_recog_mixed_size_cond_pattern
1574 Try to find the following pattern:
1576 type x_t, y_t;
1577 TYPE a_T, b_T, c_T;
1578 loop:
1579 S1 a_T = x_t CMP y_t ? b_T : c_T;
1581 where type 'TYPE' is an integral type which has different size
1582 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1583 than 'type', the constants need to fit into an integer type
1584 with the same width as 'type'.
1586 Input:
1588 * LAST_STMT: A stmt from which the pattern search begins.
1590 Output:
1592 * TYPE_IN: The type of the input arguments to the pattern.
1594 * TYPE_OUT: The type of the output of this pattern.
1596 * Return value: A new stmt that will be used to replace the pattern.
1597 Additionally a def_stmt is added.
1599 a_it = x_t CMP y_t ? b_it : c_it;
1600 a_T = (TYPE) a_it; */
1602 static gimple
1603 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1604 tree *type_out)
1606 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1607 tree cond_expr, then_clause, else_clause;
1608 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1609 tree type, vectype, comp_vectype, itype, vecitype;
1610 enum machine_mode cmpmode;
1611 gimple pattern_stmt, def_stmt;
1612 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1614 if (!is_gimple_assign (last_stmt)
1615 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1616 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1617 return NULL;
1619 cond_expr = gimple_assign_rhs1 (last_stmt);
1620 then_clause = gimple_assign_rhs2 (last_stmt);
1621 else_clause = gimple_assign_rhs3 (last_stmt);
1623 if (TREE_CODE (then_clause) != INTEGER_CST
1624 || TREE_CODE (else_clause) != INTEGER_CST)
1625 return NULL;
1627 if (!COMPARISON_CLASS_P (cond_expr))
1628 return NULL;
1630 comp_vectype
1631 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1632 if (comp_vectype == NULL_TREE)
1633 return NULL;
1635 type = gimple_expr_type (last_stmt);
1636 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1638 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1639 return NULL;
1641 vectype = get_vectype_for_scalar_type (type);
1642 if (vectype == NULL_TREE)
1643 return NULL;
1645 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1646 return NULL;
1648 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1649 TYPE_UNSIGNED (type));
1650 if (itype == NULL_TREE
1651 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1652 return NULL;
1654 vecitype = get_vectype_for_scalar_type (itype);
1655 if (vecitype == NULL_TREE)
1656 return NULL;
1658 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1659 return NULL;
1661 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1663 if (!int_fits_type_p (then_clause, itype)
1664 || !int_fits_type_p (else_clause, itype))
1665 return NULL;
1668 def_stmt
1669 = gimple_build_assign_with_ops3 (COND_EXPR,
1670 vect_recog_temp_ssa_var (itype, NULL),
1671 unshare_expr (cond_expr),
1672 fold_convert (itype, then_clause),
1673 fold_convert (itype, else_clause));
1674 pattern_stmt
1675 = gimple_build_assign_with_ops (NOP_EXPR,
1676 vect_recog_temp_ssa_var (type, NULL),
1677 gimple_assign_lhs (def_stmt), NULL_TREE);
1679 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
1680 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1681 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1682 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1683 *type_in = vecitype;
1684 *type_out = vectype;
1686 return pattern_stmt;
1690 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1691 true if bool VAR can be optimized that way. */
1693 static bool
1694 check_bool_pattern (tree var, loop_vec_info loop_vinfo)
1696 gimple def_stmt;
1697 enum vect_def_type dt;
1698 tree def, rhs1;
1699 enum tree_code rhs_code;
1701 if (!vect_is_simple_use (var, loop_vinfo, NULL, &def_stmt, &def, &dt))
1702 return false;
1704 if (dt != vect_internal_def)
1705 return false;
1707 if (!is_gimple_assign (def_stmt))
1708 return false;
1710 if (!has_single_use (def))
1711 return false;
1713 rhs1 = gimple_assign_rhs1 (def_stmt);
1714 rhs_code = gimple_assign_rhs_code (def_stmt);
1715 switch (rhs_code)
1717 case SSA_NAME:
1718 return check_bool_pattern (rhs1, loop_vinfo);
1720 CASE_CONVERT:
1721 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1722 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1723 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1724 return false;
1725 return check_bool_pattern (rhs1, loop_vinfo);
1727 case BIT_NOT_EXPR:
1728 return check_bool_pattern (rhs1, loop_vinfo);
1730 case BIT_AND_EXPR:
1731 case BIT_IOR_EXPR:
1732 case BIT_XOR_EXPR:
1733 if (!check_bool_pattern (rhs1, loop_vinfo))
1734 return false;
1735 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
1737 default:
1738 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
1740 tree vecitype, comp_vectype;
1742 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
1743 if (comp_vectype == NULL_TREE)
1744 return false;
1746 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
1748 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1749 tree itype
1750 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
1751 vecitype = get_vectype_for_scalar_type (itype);
1752 if (vecitype == NULL_TREE)
1753 return false;
1755 else
1756 vecitype = comp_vectype;
1757 return expand_vec_cond_expr_p (vecitype, comp_vectype);
1759 return false;
1764 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
1765 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1766 to PATTERN_DEF_STMT and adding a cast as RELATED_STMT. */
1768 static tree
1769 adjust_bool_pattern_cast (tree type, tree var)
1771 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
1772 gimple cast_stmt, pattern_stmt;
1774 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo));
1775 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1776 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = pattern_stmt;
1777 cast_stmt
1778 = gimple_build_assign_with_ops (NOP_EXPR,
1779 vect_recog_temp_ssa_var (type, NULL),
1780 gimple_assign_lhs (pattern_stmt),
1781 NULL_TREE);
1782 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
1783 return gimple_assign_lhs (cast_stmt);
1787 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
1788 recursively. VAR is an SSA_NAME that should be transformed from bool
1789 to a wider integer type, OUT_TYPE is the desired final integer type of
1790 the whole pattern, TRUEVAL should be NULL unless optimizing
1791 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
1792 in the then_clause, STMTS is where statements with added pattern stmts
1793 should be pushed to. */
1795 static tree
1796 adjust_bool_pattern (tree var, tree out_type, tree trueval,
1797 VEC (gimple, heap) **stmts)
1799 gimple stmt = SSA_NAME_DEF_STMT (var);
1800 enum tree_code rhs_code, def_rhs_code;
1801 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
1802 location_t loc;
1803 gimple pattern_stmt, def_stmt;
1805 rhs1 = gimple_assign_rhs1 (stmt);
1806 rhs2 = gimple_assign_rhs2 (stmt);
1807 rhs_code = gimple_assign_rhs_code (stmt);
1808 loc = gimple_location (stmt);
1809 switch (rhs_code)
1811 case SSA_NAME:
1812 CASE_CONVERT:
1813 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1814 itype = TREE_TYPE (irhs1);
1815 pattern_stmt
1816 = gimple_build_assign_with_ops (SSA_NAME,
1817 vect_recog_temp_ssa_var (itype, NULL),
1818 irhs1, NULL_TREE);
1819 break;
1821 case BIT_NOT_EXPR:
1822 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1823 itype = TREE_TYPE (irhs1);
1824 pattern_stmt
1825 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
1826 vect_recog_temp_ssa_var (itype, NULL),
1827 irhs1, build_int_cst (itype, 1));
1828 break;
1830 case BIT_AND_EXPR:
1831 /* Try to optimize x = y & (a < b ? 1 : 0); into
1832 x = (a < b ? y : 0);
1834 E.g. for:
1835 bool a_b, b_b, c_b;
1836 TYPE d_T;
1838 S1 a_b = x1 CMP1 y1;
1839 S2 b_b = x2 CMP2 y2;
1840 S3 c_b = a_b & b_b;
1841 S4 d_T = (TYPE) c_b;
1843 we would normally emit:
1845 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1846 S2' b_T = x2 CMP2 y2 ? 1 : 0;
1847 S3' c_T = a_T & b_T;
1848 S4' d_T = c_T;
1850 but we can save one stmt by using the
1851 result of one of the COND_EXPRs in the other COND_EXPR and leave
1852 BIT_AND_EXPR stmt out:
1854 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1855 S3' c_T = x2 CMP2 y2 ? a_T : 0;
1856 S4' f_T = c_T;
1858 At least when VEC_COND_EXPR is implemented using masks
1859 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
1860 computes the comparison masks and ands it, in one case with
1861 all ones vector, in the other case with a vector register.
1862 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
1863 often more expensive. */
1864 def_stmt = SSA_NAME_DEF_STMT (rhs2);
1865 def_rhs_code = gimple_assign_rhs_code (def_stmt);
1866 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
1868 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
1869 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1870 if (TYPE_PRECISION (TREE_TYPE (irhs1))
1871 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
1873 gimple tstmt;
1874 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
1875 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
1876 tstmt = VEC_pop (gimple, *stmts);
1877 gcc_assert (tstmt == def_stmt);
1878 VEC_quick_push (gimple, *stmts, stmt);
1879 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
1880 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
1881 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
1882 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
1883 return irhs2;
1885 else
1886 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
1887 goto and_ior_xor;
1889 def_stmt = SSA_NAME_DEF_STMT (rhs1);
1890 def_rhs_code = gimple_assign_rhs_code (def_stmt);
1891 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
1893 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
1894 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
1895 if (TYPE_PRECISION (TREE_TYPE (irhs2))
1896 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
1898 gimple tstmt;
1899 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
1900 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
1901 tstmt = VEC_pop (gimple, *stmts);
1902 gcc_assert (tstmt == def_stmt);
1903 VEC_quick_push (gimple, *stmts, stmt);
1904 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
1905 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
1906 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
1907 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
1908 return irhs1;
1910 else
1911 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1912 goto and_ior_xor;
1914 /* FALLTHRU */
1915 case BIT_IOR_EXPR:
1916 case BIT_XOR_EXPR:
1917 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1918 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
1919 and_ior_xor:
1920 if (TYPE_PRECISION (TREE_TYPE (irhs1))
1921 != TYPE_PRECISION (TREE_TYPE (irhs2)))
1923 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
1924 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
1925 int out_prec = TYPE_PRECISION (out_type);
1926 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
1927 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
1928 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
1929 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
1930 else
1932 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
1933 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
1936 itype = TREE_TYPE (irhs1);
1937 pattern_stmt
1938 = gimple_build_assign_with_ops (rhs_code,
1939 vect_recog_temp_ssa_var (itype, NULL),
1940 irhs1, irhs2);
1941 break;
1943 default:
1944 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
1945 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
1946 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1948 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1949 itype
1950 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
1952 else
1953 itype = TREE_TYPE (rhs1);
1954 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
1955 if (trueval == NULL_TREE)
1956 trueval = build_int_cst (itype, 1);
1957 else
1958 gcc_checking_assert (useless_type_conversion_p (itype,
1959 TREE_TYPE (trueval)));
1960 pattern_stmt
1961 = gimple_build_assign_with_ops3 (COND_EXPR,
1962 vect_recog_temp_ssa_var (itype, NULL),
1963 cond_expr, trueval,
1964 build_int_cst (itype, 0));
1965 break;
1968 VEC_safe_push (gimple, heap, *stmts, stmt);
1969 gimple_set_location (pattern_stmt, loc);
1970 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1971 return gimple_assign_lhs (pattern_stmt);
1975 /* Function vect_recog_bool_pattern
1977 Try to find pattern like following:
1979 bool a_b, b_b, c_b, d_b, e_b;
1980 TYPE f_T;
1981 loop:
1982 S1 a_b = x1 CMP1 y1;
1983 S2 b_b = x2 CMP2 y2;
1984 S3 c_b = a_b & b_b;
1985 S4 d_b = x3 CMP3 y3;
1986 S5 e_b = c_b | d_b;
1987 S6 f_T = (TYPE) e_b;
1989 where type 'TYPE' is an integral type.
1991 Input:
1993 * LAST_STMT: A stmt at the end from which the pattern
1994 search begins, i.e. cast of a bool to
1995 an integer type.
1997 Output:
1999 * TYPE_IN: The type of the input arguments to the pattern.
2001 * TYPE_OUT: The type of the output of this pattern.
2003 * Return value: A new stmt that will be used to replace the pattern.
2005 Assuming size of TYPE is the same as size of all comparisons
2006 (otherwise some casts would be added where needed), the above
2007 sequence we create related pattern stmts:
2008 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2009 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2010 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2011 S5' e_T = c_T | d_T;
2012 S6' f_T = e_T;
2014 Instead of the above S3' we could emit:
2015 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2016 S3' c_T = a_T | b_T;
2017 but the above is more efficient. */
2019 static gimple
2020 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2021 tree *type_out)
2023 gimple last_stmt = VEC_pop (gimple, *stmts);
2024 enum tree_code rhs_code;
2025 tree var, lhs, rhs, vectype;
2026 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2027 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2028 gimple pattern_stmt;
2030 if (!is_gimple_assign (last_stmt))
2031 return NULL;
2033 var = gimple_assign_rhs1 (last_stmt);
2034 lhs = gimple_assign_lhs (last_stmt);
2036 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2037 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2038 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2039 return NULL;
2041 rhs_code = gimple_assign_rhs_code (last_stmt);
2042 if (CONVERT_EXPR_CODE_P (rhs_code))
2044 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE)
2045 return NULL;
2046 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2047 if (vectype == NULL_TREE)
2048 return NULL;
2050 if (!check_bool_pattern (var, loop_vinfo))
2051 return NULL;
2053 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2054 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2055 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2056 pattern_stmt
2057 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2058 else
2059 pattern_stmt
2060 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2061 *type_out = vectype;
2062 *type_in = vectype;
2063 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2064 return pattern_stmt;
2066 else if (rhs_code == SSA_NAME
2067 && STMT_VINFO_DATA_REF (stmt_vinfo))
2069 stmt_vec_info pattern_stmt_info;
2070 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2071 gcc_assert (vectype != NULL_TREE);
2072 if (!check_bool_pattern (var, loop_vinfo))
2073 return NULL;
2075 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2076 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2077 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2079 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2080 gimple cast_stmt
2081 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2082 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt;
2083 rhs = rhs2;
2085 pattern_stmt
2086 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2087 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2088 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2089 STMT_VINFO_DATA_REF (pattern_stmt_info)
2090 = STMT_VINFO_DATA_REF (stmt_vinfo);
2091 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2092 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2093 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2094 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2095 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2096 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2097 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2098 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2099 *type_out = vectype;
2100 *type_in = vectype;
2101 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2102 return pattern_stmt;
2104 else
2105 return NULL;
2109 /* Mark statements that are involved in a pattern. */
2111 static inline void
2112 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2113 tree pattern_vectype)
2115 stmt_vec_info pattern_stmt_info, def_stmt_info;
2116 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2117 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2118 gimple def_stmt;
2120 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2121 if (pattern_stmt_info == NULL)
2123 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2124 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2126 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2128 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2129 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2130 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2131 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2132 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2133 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2134 STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
2135 = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
2136 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
2138 def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
2139 def_stmt_info = vinfo_for_stmt (def_stmt);
2140 if (def_stmt_info == NULL)
2142 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
2143 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2145 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2146 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2147 STMT_VINFO_DEF_TYPE (def_stmt_info)
2148 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2149 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2150 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2154 /* Function vect_pattern_recog_1
2156 Input:
2157 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2158 computation pattern.
2159 STMT: A stmt from which the pattern search should start.
2161 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2162 expression that computes the same functionality and can be used to
2163 replace the sequence of stmts that are involved in the pattern.
2165 Output:
2166 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2167 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2168 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2169 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2170 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2171 to the available target pattern.
2173 This function also does some bookkeeping, as explained in the documentation
2174 for vect_recog_pattern. */
2176 static void
2177 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2178 gimple_stmt_iterator si,
2179 VEC (gimple, heap) **stmts_to_replace)
2181 gimple stmt = gsi_stmt (si), pattern_stmt;
2182 stmt_vec_info stmt_info;
2183 loop_vec_info loop_vinfo;
2184 tree pattern_vectype;
2185 tree type_in, type_out;
2186 enum tree_code code;
2187 int i;
2188 gimple next;
2190 VEC_truncate (gimple, *stmts_to_replace, 0);
2191 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2192 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2193 if (!pattern_stmt)
2194 return;
2196 stmt = VEC_last (gimple, *stmts_to_replace);
2197 stmt_info = vinfo_for_stmt (stmt);
2198 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2200 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2202 /* No need to check target support (already checked by the pattern
2203 recognition function). */
2204 pattern_vectype = type_out ? type_out : type_in;
2206 else
2208 enum machine_mode vec_mode;
2209 enum insn_code icode;
2210 optab optab;
2212 /* Check target support */
2213 type_in = get_vectype_for_scalar_type (type_in);
2214 if (!type_in)
2215 return;
2216 if (type_out)
2217 type_out = get_vectype_for_scalar_type (type_out);
2218 else
2219 type_out = type_in;
2220 if (!type_out)
2221 return;
2222 pattern_vectype = type_out;
2224 if (is_gimple_assign (pattern_stmt))
2225 code = gimple_assign_rhs_code (pattern_stmt);
2226 else
2228 gcc_assert (is_gimple_call (pattern_stmt));
2229 code = CALL_EXPR;
2232 optab = optab_for_tree_code (code, type_in, optab_default);
2233 vec_mode = TYPE_MODE (type_in);
2234 if (!optab
2235 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2236 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2237 return;
2240 /* Found a vectorizable pattern. */
2241 if (vect_print_dump_info (REPORT_DETAILS))
2243 fprintf (vect_dump, "pattern recognized: ");
2244 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2247 /* Mark the stmts that are involved in the pattern. */
2248 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2250 /* Patterns cannot be vectorized using SLP, because they change the order of
2251 computation. */
2252 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2253 if (next == stmt)
2254 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2256 /* It is possible that additional pattern stmts are created and inserted in
2257 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2258 relevant statements. */
2259 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2260 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2261 i++)
2263 stmt_info = vinfo_for_stmt (stmt);
2264 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2265 if (vect_print_dump_info (REPORT_DETAILS))
2267 fprintf (vect_dump, "additional pattern stmt: ");
2268 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2271 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2276 /* Function vect_pattern_recog
2278 Input:
2279 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2280 computation idioms.
2282 Output - for each computation idiom that is detected we create a new stmt
2283 that provides the same functionality and that can be vectorized. We
2284 also record some information in the struct_stmt_info of the relevant
2285 stmts, as explained below:
2287 At the entry to this function we have the following stmts, with the
2288 following initial value in the STMT_VINFO fields:
2290 stmt in_pattern_p related_stmt vec_stmt
2291 S1: a_i = .... - - -
2292 S2: a_2 = ..use(a_i).. - - -
2293 S3: a_1 = ..use(a_2).. - - -
2294 S4: a_0 = ..use(a_1).. - - -
2295 S5: ... = ..use(a_0).. - - -
2297 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2298 represented by a single stmt. We then:
2299 - create a new stmt S6 equivalent to the pattern (the stmt is not
2300 inserted into the code)
2301 - fill in the STMT_VINFO fields as follows:
2303 in_pattern_p related_stmt vec_stmt
2304 S1: a_i = .... - - -
2305 S2: a_2 = ..use(a_i).. - - -
2306 S3: a_1 = ..use(a_2).. - - -
2307 S4: a_0 = ..use(a_1).. true S6 -
2308 '---> S6: a_new = .... - S4 -
2309 S5: ... = ..use(a_0).. - - -
2311 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2312 to each other through the RELATED_STMT field).
2314 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2315 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2316 remain irrelevant unless used by stmts other than S4.
2318 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2319 (because they are marked as irrelevant). It will vectorize S6, and record
2320 a pointer to the new vector stmt VS6 from S6 (as usual).
2321 S4 will be skipped, and S5 will be vectorized as usual:
2323 in_pattern_p related_stmt vec_stmt
2324 S1: a_i = .... - - -
2325 S2: a_2 = ..use(a_i).. - - -
2326 S3: a_1 = ..use(a_2).. - - -
2327 > VS6: va_new = .... - - -
2328 S4: a_0 = ..use(a_1).. true S6 VS6
2329 '---> S6: a_new = .... - S4 VS6
2330 > VS5: ... = ..vuse(va_new).. - - -
2331 S5: ... = ..use(a_0).. - - -
2333 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2334 elsewhere), and we'll end up with:
2336 VS6: va_new = ....
2337 VS5: ... = ..vuse(va_new)..
2339 In case of more than one pattern statements, e.g., widen-mult with
2340 intermediate type:
2342 S1 a_t = ;
2343 S2 a_T = (TYPE) a_t;
2344 '--> S3: a_it = (interm_type) a_t;
2345 S4 prod_T = a_T * CONST;
2346 '--> S5: prod_T' = a_it w* CONST;
2348 there may be other users of a_T outside the pattern. In that case S2 will
2349 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2350 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2351 be recorded in S3. */
2353 void
2354 vect_pattern_recog (loop_vec_info loop_vinfo)
2356 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2357 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2358 unsigned int nbbs = loop->num_nodes;
2359 gimple_stmt_iterator si;
2360 unsigned int i, j;
2361 vect_recog_func_ptr vect_recog_func;
2362 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2364 if (vect_print_dump_info (REPORT_DETAILS))
2365 fprintf (vect_dump, "=== vect_pattern_recog ===");
2367 /* Scan through the loop stmts, applying the pattern recognition
2368 functions starting at each stmt visited: */
2369 for (i = 0; i < nbbs; i++)
2371 basic_block bb = bbs[i];
2372 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2374 /* Scan over all generic vect_recog_xxx_pattern functions. */
2375 for (j = 0; j < NUM_PATTERNS; j++)
2377 vect_recog_func = vect_vect_recog_func_ptrs[j];
2378 vect_pattern_recog_1 (vect_recog_func, si,
2379 &stmts_to_replace);
2384 VEC_free (gimple, heap, stmts_to_replace);