name-lookup.c (lookup_arg_dependent): Use conditional timevars.
[official-gcc.git] / gcc / tree-vect-patterns.c
blob7fc107cafa9c83d2028d76ab8daffdbca9fe06bf
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
53 vect_recog_widen_mult_pattern,
54 vect_recog_widen_sum_pattern,
55 vect_recog_dot_prod_pattern,
56 vect_recog_pow_pattern,
57 vect_recog_over_widening_pattern};
60 /* Function widened_name_p
62 Check whether NAME, an ssa-name used in USE_STMT,
63 is a result of a type-promotion, such that:
64 DEF_STMT: NAME = NOP (name0)
65 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
66 If CHECK_SIGN is TRUE, check that either both types are signed or both are
67 unsigned. */
69 static bool
70 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
71 bool check_sign)
73 tree dummy;
74 gimple dummy_gimple;
75 loop_vec_info loop_vinfo;
76 stmt_vec_info stmt_vinfo;
77 tree type = TREE_TYPE (name);
78 tree oprnd0;
79 enum vect_def_type dt;
80 tree def;
82 stmt_vinfo = vinfo_for_stmt (use_stmt);
83 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
85 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
86 return false;
88 if (dt != vect_internal_def
89 && dt != vect_external_def && dt != vect_constant_def)
90 return false;
92 if (! *def_stmt)
93 return false;
95 if (!is_gimple_assign (*def_stmt))
96 return false;
98 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
99 return false;
101 oprnd0 = gimple_assign_rhs1 (*def_stmt);
103 *half_type = TREE_TYPE (oprnd0);
104 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
105 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
106 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
107 return false;
109 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
110 &dt))
111 return false;
113 return true;
116 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
117 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
119 static tree
120 vect_recog_temp_ssa_var (tree type, gimple stmt)
122 tree var = create_tmp_var (type, "patt");
124 add_referenced_var (var);
125 var = make_ssa_name (var, stmt);
126 return var;
129 /* Function vect_recog_dot_prod_pattern
131 Try to find the following pattern:
133 type x_t, y_t;
134 TYPE1 prod;
135 TYPE2 sum = init;
136 loop:
137 sum_0 = phi <init, sum_1>
138 S1 x_t = ...
139 S2 y_t = ...
140 S3 x_T = (TYPE1) x_t;
141 S4 y_T = (TYPE1) y_t;
142 S5 prod = x_T * y_T;
143 [S6 prod = (TYPE2) prod; #optional]
144 S7 sum_1 = prod + sum_0;
146 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
147 same size of 'TYPE1' or bigger. This is a special case of a reduction
148 computation.
150 Input:
152 * STMTS: Contains a stmt from which the pattern search begins. In the
153 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
154 will be detected.
156 Output:
158 * TYPE_IN: The type of the input arguments to the pattern.
160 * TYPE_OUT: The type of the output of this pattern.
162 * Return value: A new stmt that will be used to replace the sequence of
163 stmts that constitute the pattern. In this case it will be:
164 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
166 Note: The dot-prod idiom is a widening reduction pattern that is
167 vectorized without preserving all the intermediate results. It
168 produces only N/2 (widened) results (by summing up pairs of
169 intermediate results) rather than all N results. Therefore, we
170 cannot allow this pattern when we want to get all the results and in
171 the correct order (as is the case when this computation is in an
172 inner-loop nested in an outer-loop that us being vectorized). */
174 static gimple
175 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
176 tree *type_out)
178 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
179 tree oprnd0, oprnd1;
180 tree oprnd00, oprnd01;
181 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
182 tree type, half_type;
183 gimple pattern_stmt;
184 tree prod_type;
185 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
186 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
187 tree var;
189 if (!is_gimple_assign (last_stmt))
190 return NULL;
192 type = gimple_expr_type (last_stmt);
194 /* Look for the following pattern
195 DX = (TYPE1) X;
196 DY = (TYPE1) Y;
197 DPROD = DX * DY;
198 DDPROD = (TYPE2) DPROD;
199 sum_1 = DDPROD + sum_0;
200 In which
201 - DX is double the size of X
202 - DY is double the size of Y
203 - DX, DY, DPROD all have the same type
204 - sum is the same size of DPROD or bigger
205 - sum has been recognized as a reduction variable.
207 This is equivalent to:
208 DPROD = X w* Y; #widen mult
209 sum_1 = DPROD w+ sum_0; #widen summation
211 DPROD = X w* Y; #widen mult
212 sum_1 = DPROD + sum_0; #summation
215 /* Starting from LAST_STMT, follow the defs of its uses in search
216 of the above pattern. */
218 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
219 return NULL;
221 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
223 /* Has been detected as widening-summation? */
225 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
226 type = gimple_expr_type (stmt);
227 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
228 return NULL;
229 oprnd0 = gimple_assign_rhs1 (stmt);
230 oprnd1 = gimple_assign_rhs2 (stmt);
231 half_type = TREE_TYPE (oprnd0);
233 else
235 gimple def_stmt;
237 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
238 return NULL;
239 oprnd0 = gimple_assign_rhs1 (last_stmt);
240 oprnd1 = gimple_assign_rhs2 (last_stmt);
241 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
242 || !types_compatible_p (TREE_TYPE (oprnd1), type))
243 return NULL;
244 stmt = last_stmt;
246 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
248 stmt = def_stmt;
249 oprnd0 = gimple_assign_rhs1 (stmt);
251 else
252 half_type = type;
255 /* So far so good. Since last_stmt was detected as a (summation) reduction,
256 we know that oprnd1 is the reduction variable (defined by a loop-header
257 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
258 Left to check that oprnd0 is defined by a (widen_)mult_expr */
259 if (TREE_CODE (oprnd0) != SSA_NAME)
260 return NULL;
262 prod_type = half_type;
263 stmt = SSA_NAME_DEF_STMT (oprnd0);
265 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
266 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
267 return NULL;
269 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
270 inside the loop (in case we are analyzing an outer-loop). */
271 if (!is_gimple_assign (stmt))
272 return NULL;
273 stmt_vinfo = vinfo_for_stmt (stmt);
274 gcc_assert (stmt_vinfo);
275 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
276 return NULL;
277 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
278 return NULL;
279 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
281 /* Has been detected as a widening multiplication? */
283 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
284 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
285 return NULL;
286 stmt_vinfo = vinfo_for_stmt (stmt);
287 gcc_assert (stmt_vinfo);
288 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
289 oprnd00 = gimple_assign_rhs1 (stmt);
290 oprnd01 = gimple_assign_rhs2 (stmt);
292 else
294 tree half_type0, half_type1;
295 gimple def_stmt;
296 tree oprnd0, oprnd1;
298 oprnd0 = gimple_assign_rhs1 (stmt);
299 oprnd1 = gimple_assign_rhs2 (stmt);
300 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
301 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
302 return NULL;
303 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
304 return NULL;
305 oprnd00 = gimple_assign_rhs1 (def_stmt);
306 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
307 return NULL;
308 oprnd01 = gimple_assign_rhs1 (def_stmt);
309 if (!types_compatible_p (half_type0, half_type1))
310 return NULL;
311 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
312 return NULL;
315 half_type = TREE_TYPE (oprnd00);
316 *type_in = half_type;
317 *type_out = type;
319 /* Pattern detected. Create a stmt to be used to replace the pattern: */
320 var = vect_recog_temp_ssa_var (type, NULL);
321 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
322 oprnd00, oprnd01, oprnd1);
324 if (vect_print_dump_info (REPORT_DETAILS))
326 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
327 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
330 /* We don't allow changing the order of the computation in the inner-loop
331 when doing outer-loop vectorization. */
332 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
334 return pattern_stmt;
338 /* Handle two cases of multiplication by a constant. The first one is when
339 the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
340 operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
341 TYPE.
343 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
344 HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
345 TYPE), we can perform widen-mult from the intermediate type to TYPE and
346 replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
348 static bool
349 vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
350 VEC (gimple, heap) **stmts, tree type,
351 tree *half_type, gimple def_stmt)
353 tree new_type, new_oprnd, tmp;
354 gimple new_stmt;
355 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
356 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
358 if (int_fits_type_p (const_oprnd, *half_type))
360 /* CONST_OPRND is a constant of HALF_TYPE. */
361 *oprnd = gimple_assign_rhs1 (def_stmt);
362 return true;
365 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
366 || !gimple_bb (def_stmt)
367 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
368 || !vinfo_for_stmt (def_stmt))
369 return false;
371 /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
372 a type 2 times bigger than HALF_TYPE. */
373 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
374 TYPE_UNSIGNED (type));
375 if (!int_fits_type_p (const_oprnd, new_type))
376 return false;
378 /* Use NEW_TYPE for widen_mult. */
379 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
381 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
382 /* Check if the already created pattern stmt is what we need. */
383 if (!is_gimple_assign (new_stmt)
384 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
385 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
386 return false;
388 *oprnd = gimple_assign_lhs (new_stmt);
390 else
392 /* Create a_T = (NEW_TYPE) a_t; */
393 *oprnd = gimple_assign_rhs1 (def_stmt);
394 tmp = create_tmp_var (new_type, NULL);
395 add_referenced_var (tmp);
396 new_oprnd = make_ssa_name (tmp, NULL);
397 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
398 NULL_TREE);
399 SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
400 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
401 VEC_safe_push (gimple, heap, *stmts, def_stmt);
402 *oprnd = new_oprnd;
405 *half_type = new_type;
406 return true;
410 /* Function vect_recog_widen_mult_pattern
412 Try to find the following pattern:
414 type a_t, b_t;
415 TYPE a_T, b_T, prod_T;
417 S1 a_t = ;
418 S2 b_t = ;
419 S3 a_T = (TYPE) a_t;
420 S4 b_T = (TYPE) b_t;
421 S5 prod_T = a_T * b_T;
423 where type 'TYPE' is at least double the size of type 'type'.
425 Also detect unsgigned cases:
427 unsigned type a_t, b_t;
428 unsigned TYPE u_prod_T;
429 TYPE a_T, b_T, prod_T;
431 S1 a_t = ;
432 S2 b_t = ;
433 S3 a_T = (TYPE) a_t;
434 S4 b_T = (TYPE) b_t;
435 S5 prod_T = a_T * b_T;
436 S6 u_prod_T = (unsigned TYPE) prod_T;
438 and multiplication by constants:
440 type a_t;
441 TYPE a_T, prod_T;
443 S1 a_t = ;
444 S3 a_T = (TYPE) a_t;
445 S5 prod_T = a_T * CONST;
447 A special case of multiplication by constants is when 'TYPE' is 4 times
448 bigger than 'type', but CONST fits an intermediate type 2 times smaller
449 than 'TYPE'. In that case we create an additional pattern stmt for S3
450 to create a variable of the intermediate type, and perform widen-mult
451 on the intermediate type as well:
453 type a_t;
454 interm_type a_it;
455 TYPE a_T, prod_T, prod_T';
457 S1 a_t = ;
458 S3 a_T = (TYPE) a_t;
459 '--> a_it = (interm_type) a_t;
460 S5 prod_T = a_T * CONST;
461 '--> prod_T' = a_it w* CONST;
463 Input/Output:
465 * STMTS: Contains a stmt from which the pattern search begins. In the
466 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
467 is detected. In case of unsigned widen-mult, the original stmt (S5) is
468 replaced with S6 in STMTS. In case of multiplication by a constant
469 of an intermediate type (the last case above), STMTS also contains S3
470 (inserted before S5).
472 Output:
474 * TYPE_IN: The type of the input arguments to the pattern.
476 * TYPE_OUT: The type of the output of this pattern.
478 * Return value: A new stmt that will be used to replace the sequence of
479 stmts that constitute the pattern. In this case it will be:
480 WIDEN_MULT <a_t, b_t>
483 static gimple
484 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
485 tree *type_in, tree *type_out)
487 gimple last_stmt = VEC_pop (gimple, *stmts);
488 gimple def_stmt0, def_stmt1;
489 tree oprnd0, oprnd1;
490 tree type, half_type0, half_type1;
491 gimple pattern_stmt;
492 tree vectype, vectype_out = NULL_TREE;
493 tree dummy;
494 tree var;
495 enum tree_code dummy_code;
496 int dummy_int;
497 VEC (tree, heap) *dummy_vec;
498 bool op0_ok, op1_ok;
500 if (!is_gimple_assign (last_stmt))
501 return NULL;
503 type = gimple_expr_type (last_stmt);
505 /* Starting from LAST_STMT, follow the defs of its uses in search
506 of the above pattern. */
508 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
509 return NULL;
511 oprnd0 = gimple_assign_rhs1 (last_stmt);
512 oprnd1 = gimple_assign_rhs2 (last_stmt);
513 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
514 || !types_compatible_p (TREE_TYPE (oprnd1), type))
515 return NULL;
517 /* Check argument 0. */
518 op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
519 /* Check argument 1. */
520 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
522 /* In case of multiplication by a constant one of the operands may not match
523 the pattern, but not both. */
524 if (!op0_ok && !op1_ok)
525 return NULL;
527 if (op0_ok && op1_ok)
529 oprnd0 = gimple_assign_rhs1 (def_stmt0);
530 oprnd1 = gimple_assign_rhs1 (def_stmt1);
532 else if (!op0_ok)
534 if (TREE_CODE (oprnd0) == INTEGER_CST
535 && TREE_CODE (half_type1) == INTEGER_TYPE
536 && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1,
537 stmts, type,
538 &half_type1, def_stmt1))
539 half_type0 = half_type1;
540 else
541 return NULL;
543 else if (!op1_ok)
545 if (TREE_CODE (oprnd1) == INTEGER_CST
546 && TREE_CODE (half_type0) == INTEGER_TYPE
547 && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
548 stmts, type,
549 &half_type0, def_stmt0))
550 half_type1 = half_type0;
551 else
552 return NULL;
555 /* Handle unsigned case. Look for
556 S6 u_prod_T = (unsigned TYPE) prod_T;
557 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
558 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
560 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
561 imm_use_iterator imm_iter;
562 use_operand_p use_p;
563 int nuses = 0;
564 gimple use_stmt = NULL;
565 tree use_type;
567 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
568 return NULL;
570 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
572 if (is_gimple_debug (USE_STMT (use_p)))
573 continue;
574 use_stmt = USE_STMT (use_p);
575 nuses++;
578 if (nuses != 1 || !is_gimple_assign (use_stmt)
579 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
580 return NULL;
582 use_lhs = gimple_assign_lhs (use_stmt);
583 use_type = TREE_TYPE (use_lhs);
584 if (!INTEGRAL_TYPE_P (use_type)
585 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
586 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
587 return NULL;
589 type = use_type;
590 last_stmt = use_stmt;
593 if (!types_compatible_p (half_type0, half_type1))
594 return NULL;
596 /* Pattern detected. */
597 if (vect_print_dump_info (REPORT_DETAILS))
598 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
600 /* Check target support */
601 vectype = get_vectype_for_scalar_type (half_type0);
602 vectype_out = get_vectype_for_scalar_type (type);
603 if (!vectype
604 || !vectype_out
605 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
606 vectype_out, vectype,
607 &dummy, &dummy, &dummy_code,
608 &dummy_code, &dummy_int, &dummy_vec))
609 return NULL;
611 *type_in = vectype;
612 *type_out = vectype_out;
614 /* Pattern supported. Create a stmt to be used to replace the pattern: */
615 var = vect_recog_temp_ssa_var (type, NULL);
616 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
617 oprnd1);
618 SSA_NAME_DEF_STMT (var) = pattern_stmt;
620 if (vect_print_dump_info (REPORT_DETAILS))
621 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
623 VEC_safe_push (gimple, heap, *stmts, last_stmt);
624 return pattern_stmt;
628 /* Function vect_recog_pow_pattern
630 Try to find the following pattern:
632 x = POW (y, N);
634 with POW being one of pow, powf, powi, powif and N being
635 either 2 or 0.5.
637 Input:
639 * LAST_STMT: A stmt from which the pattern search begins.
641 Output:
643 * TYPE_IN: The type of the input arguments to the pattern.
645 * TYPE_OUT: The type of the output of this pattern.
647 * Return value: A new stmt that will be used to replace the sequence of
648 stmts that constitute the pattern. In this case it will be:
649 x = x * x
651 x = sqrt (x)
654 static gimple
655 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
656 tree *type_out)
658 gimple last_stmt = VEC_index (gimple, *stmts, 0);
659 tree fn, base, exp = NULL;
660 gimple stmt;
661 tree var;
663 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
664 return NULL;
666 fn = gimple_call_fndecl (last_stmt);
667 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
668 return NULL;
670 switch (DECL_FUNCTION_CODE (fn))
672 case BUILT_IN_POWIF:
673 case BUILT_IN_POWI:
674 case BUILT_IN_POWF:
675 case BUILT_IN_POW:
676 base = gimple_call_arg (last_stmt, 0);
677 exp = gimple_call_arg (last_stmt, 1);
678 if (TREE_CODE (exp) != REAL_CST
679 && TREE_CODE (exp) != INTEGER_CST)
680 return NULL;
681 break;
683 default:
684 return NULL;
687 /* We now have a pow or powi builtin function call with a constant
688 exponent. */
690 *type_out = NULL_TREE;
692 /* Catch squaring. */
693 if ((host_integerp (exp, 0)
694 && tree_low_cst (exp, 0) == 2)
695 || (TREE_CODE (exp) == REAL_CST
696 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
698 *type_in = TREE_TYPE (base);
700 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
701 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
702 SSA_NAME_DEF_STMT (var) = stmt;
703 return stmt;
706 /* Catch square root. */
707 if (TREE_CODE (exp) == REAL_CST
708 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
710 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
711 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
712 if (*type_in)
714 gimple stmt = gimple_build_call (newfn, 1, base);
715 if (vectorizable_function (stmt, *type_in, *type_in)
716 != NULL_TREE)
718 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
719 gimple_call_set_lhs (stmt, var);
720 return stmt;
725 return NULL;
729 /* Function vect_recog_widen_sum_pattern
731 Try to find the following pattern:
733 type x_t;
734 TYPE x_T, sum = init;
735 loop:
736 sum_0 = phi <init, sum_1>
737 S1 x_t = *p;
738 S2 x_T = (TYPE) x_t;
739 S3 sum_1 = x_T + sum_0;
741 where type 'TYPE' is at least double the size of type 'type', i.e - we're
742 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
743 a special case of a reduction computation.
745 Input:
747 * LAST_STMT: A stmt from which the pattern search begins. In the example,
748 when this function is called with S3, the pattern {S2,S3} will be detected.
750 Output:
752 * TYPE_IN: The type of the input arguments to the pattern.
754 * TYPE_OUT: The type of the output of this pattern.
756 * Return value: A new stmt that will be used to replace the sequence of
757 stmts that constitute the pattern. In this case it will be:
758 WIDEN_SUM <x_t, sum_0>
760 Note: The widening-sum idiom is a widening reduction pattern that is
761 vectorized without preserving all the intermediate results. It
762 produces only N/2 (widened) results (by summing up pairs of
763 intermediate results) rather than all N results. Therefore, we
764 cannot allow this pattern when we want to get all the results and in
765 the correct order (as is the case when this computation is in an
766 inner-loop nested in an outer-loop that us being vectorized). */
768 static gimple
769 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
770 tree *type_out)
772 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
773 tree oprnd0, oprnd1;
774 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
775 tree type, half_type;
776 gimple pattern_stmt;
777 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
778 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
779 tree var;
781 if (!is_gimple_assign (last_stmt))
782 return NULL;
784 type = gimple_expr_type (last_stmt);
786 /* Look for the following pattern
787 DX = (TYPE) X;
788 sum_1 = DX + sum_0;
789 In which DX is at least double the size of X, and sum_1 has been
790 recognized as a reduction variable.
793 /* Starting from LAST_STMT, follow the defs of its uses in search
794 of the above pattern. */
796 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
797 return NULL;
799 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
800 return NULL;
802 oprnd0 = gimple_assign_rhs1 (last_stmt);
803 oprnd1 = gimple_assign_rhs2 (last_stmt);
804 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
805 || !types_compatible_p (TREE_TYPE (oprnd1), type))
806 return NULL;
808 /* So far so good. Since last_stmt was detected as a (summation) reduction,
809 we know that oprnd1 is the reduction variable (defined by a loop-header
810 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
811 Left to check that oprnd0 is defined by a cast from type 'type' to type
812 'TYPE'. */
814 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
815 return NULL;
817 oprnd0 = gimple_assign_rhs1 (stmt);
818 *type_in = half_type;
819 *type_out = type;
821 /* Pattern detected. Create a stmt to be used to replace the pattern: */
822 var = vect_recog_temp_ssa_var (type, NULL);
823 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
824 oprnd0, oprnd1);
825 SSA_NAME_DEF_STMT (var) = pattern_stmt;
827 if (vect_print_dump_info (REPORT_DETAILS))
829 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
830 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
833 /* We don't allow changing the order of the computation in the inner-loop
834 when doing outer-loop vectorization. */
835 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
837 return pattern_stmt;
841 /* Return TRUE if the operation in STMT can be performed on a smaller type.
843 Input:
844 STMT - a statement to check.
845 DEF - we support operations with two operands, one of which is constant.
846 The other operand can be defined by a demotion operation, or by a
847 previous statement in a sequence of over-promoted operations. In the
848 later case DEF is used to replace that operand. (It is defined by a
849 pattern statement we created for the previous statement in the
850 sequence).
852 Input/output:
853 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
854 NULL, it's the type of DEF.
855 STMTS - additional pattern statements. If a pattern statement (type
856 conversion) is created in this function, its original statement is
857 added to STMTS.
859 Output:
860 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
861 operands to use in the new pattern statement for STMT (will be created
862 in vect_recog_over_widening_pattern ()).
863 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
864 statements for STMT: the first one is a type promotion and the second
865 one is the operation itself. We return the type promotion statement
866 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
867 the second pattern statement. */
869 static bool
870 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
871 tree *op0, tree *op1, gimple *new_def_stmt,
872 VEC (gimple, heap) **stmts)
874 enum tree_code code;
875 tree const_oprnd, oprnd;
876 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
877 gimple def_stmt, new_stmt;
878 bool first = false;
879 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
880 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
882 *new_def_stmt = NULL;
884 if (!is_gimple_assign (stmt))
885 return false;
887 code = gimple_assign_rhs_code (stmt);
888 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
889 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
890 return false;
892 oprnd = gimple_assign_rhs1 (stmt);
893 const_oprnd = gimple_assign_rhs2 (stmt);
894 type = gimple_expr_type (stmt);
896 if (TREE_CODE (oprnd) != SSA_NAME
897 || TREE_CODE (const_oprnd) != INTEGER_CST)
898 return false;
900 /* If we are in the middle of a sequence, we use DEF from a previous
901 statement. Otherwise, OPRND has to be a result of type promotion. */
902 if (*new_type)
904 half_type = *new_type;
905 oprnd = def;
907 else
909 first = true;
910 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
911 || !gimple_bb (def_stmt)
912 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
913 || !vinfo_for_stmt (def_stmt))
914 return false;
917 /* Can we perform the operation on a smaller type? */
918 switch (code)
920 case BIT_IOR_EXPR:
921 case BIT_XOR_EXPR:
922 case BIT_AND_EXPR:
923 if (!int_fits_type_p (const_oprnd, half_type))
925 /* HALF_TYPE is not enough. Try a bigger type if possible. */
926 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
927 return false;
929 interm_type = build_nonstandard_integer_type (
930 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
931 if (!int_fits_type_p (const_oprnd, interm_type))
932 return false;
935 break;
937 case LSHIFT_EXPR:
938 /* Try intermediate type - HALF_TYPE is not enough for sure. */
939 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
940 return false;
942 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
943 (e.g., if the original value was char, the shift amount is at most 8
944 if we want to use short). */
945 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
946 return false;
948 interm_type = build_nonstandard_integer_type (
949 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
951 if (!vect_supportable_shift (code, interm_type))
952 return false;
954 break;
956 case RSHIFT_EXPR:
957 if (vect_supportable_shift (code, half_type))
958 break;
960 /* Try intermediate type - HALF_TYPE is not supported. */
961 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
962 return false;
964 interm_type = build_nonstandard_integer_type (
965 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
967 if (!vect_supportable_shift (code, interm_type))
968 return false;
970 break;
972 default:
973 gcc_unreachable ();
976 /* There are four possible cases:
977 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
978 the first statement in the sequence)
979 a. The original, HALF_TYPE, is not enough - we replace the promotion
980 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
981 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
982 promotion.
983 2. OPRND is defined by a pattern statement we created.
984 a. Its type is not sufficient for the operation, we create a new stmt:
985 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
986 this statement in NEW_DEF_STMT, and it is later put in
987 STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
988 b. OPRND is good to use in the new statement. */
989 if (first)
991 if (interm_type)
993 /* Replace the original type conversion HALF_TYPE->TYPE with
994 HALF_TYPE->INTERM_TYPE. */
995 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
997 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
998 /* Check if the already created pattern stmt is what we need. */
999 if (!is_gimple_assign (new_stmt)
1000 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1001 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1002 return false;
1004 oprnd = gimple_assign_lhs (new_stmt);
1006 else
1008 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1009 oprnd = gimple_assign_rhs1 (def_stmt);
1010 tmp = create_tmp_reg (interm_type, NULL);
1011 add_referenced_var (tmp);
1012 new_oprnd = make_ssa_name (tmp, NULL);
1013 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1014 oprnd, NULL_TREE);
1015 SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
1016 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1017 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1018 oprnd = new_oprnd;
1021 else
1023 /* Retrieve the operand before the type promotion. */
1024 oprnd = gimple_assign_rhs1 (def_stmt);
1027 else
1029 if (interm_type)
1031 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1032 tmp = create_tmp_reg (interm_type, NULL);
1033 add_referenced_var (tmp);
1034 new_oprnd = make_ssa_name (tmp, NULL);
1035 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1036 oprnd, NULL_TREE);
1037 SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
1038 oprnd = new_oprnd;
1039 *new_def_stmt = new_stmt;
1042 /* Otherwise, OPRND is already set. */
1045 if (interm_type)
1046 *new_type = interm_type;
1047 else
1048 *new_type = half_type;
1050 *op0 = oprnd;
1051 *op1 = fold_convert (*new_type, const_oprnd);
1053 return true;
1057 /* Try to find a statement or a sequence of statements that can be performed
1058 on a smaller type:
1060 type x_t;
1061 TYPE x_T, res0_T, res1_T;
1062 loop:
1063 S1 x_t = *p;
1064 S2 x_T = (TYPE) x_t;
1065 S3 res0_T = op (x_T, C0);
1066 S4 res1_T = op (res0_T, C1);
1067 S5 ... = () res1_T; - type demotion
1069 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1070 constants.
1071 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1072 be 'type' or some intermediate type. For now, we expect S5 to be a type
1073 demotion operation. We also check that S3 and S4 have only one use.
1077 static gimple
1078 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1079 tree *type_in, tree *type_out)
1081 gimple stmt = VEC_pop (gimple, *stmts);
1082 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1083 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1084 imm_use_iterator imm_iter;
1085 use_operand_p use_p;
1086 int nuses = 0;
1087 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1088 bool first;
1089 struct loop *loop = (gimple_bb (stmt))->loop_father;
1091 first = true;
1092 while (1)
1094 if (!vinfo_for_stmt (stmt)
1095 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1096 return NULL;
1098 new_def_stmt = NULL;
1099 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1100 &op0, &op1, &new_def_stmt,
1101 stmts))
1103 if (first)
1104 return NULL;
1105 else
1106 break;
1109 /* STMT can be performed on a smaller type. Check its uses. */
1110 lhs = gimple_assign_lhs (stmt);
1111 nuses = 0;
1112 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1114 if (is_gimple_debug (USE_STMT (use_p)))
1115 continue;
1116 use_stmt = USE_STMT (use_p);
1117 nuses++;
1120 if (nuses != 1 || !is_gimple_assign (use_stmt)
1121 || !gimple_bb (use_stmt)
1122 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1123 return NULL;
1125 /* Create pattern statement for STMT. */
1126 vectype = get_vectype_for_scalar_type (new_type);
1127 if (!vectype)
1128 return NULL;
1130 /* We want to collect all the statements for which we create pattern
1131 statetments, except for the case when the last statement in the
1132 sequence doesn't have a corresponding pattern statement. In such
1133 case we associate the last pattern statement with the last statement
1134 in the sequence. Therefore, we only add an original statetement to
1135 the list if we know that it is not the last. */
1136 if (prev_stmt)
1137 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1139 var = vect_recog_temp_ssa_var (new_type, NULL);
1140 pattern_stmt = gimple_build_assign_with_ops (
1141 gimple_assign_rhs_code (stmt), var, op0, op1);
1142 SSA_NAME_DEF_STMT (var) = pattern_stmt;
1143 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1144 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
1146 if (vect_print_dump_info (REPORT_DETAILS))
1148 fprintf (vect_dump, "created pattern stmt: ");
1149 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1152 prev_stmt = stmt;
1153 stmt = use_stmt;
1155 first = false;
1158 /* We got a sequence. We expect it to end with a type demotion operation.
1159 Otherwise, we quit (for now). There are three possible cases: the
1160 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1161 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1162 NEW_TYPE differs (we create a new conversion statement). */
1163 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1165 use_lhs = gimple_assign_lhs (use_stmt);
1166 use_type = TREE_TYPE (use_lhs);
1167 /* Support only type promotion or signedess change. */
1168 if (!INTEGRAL_TYPE_P (use_type)
1169 || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1170 return NULL;
1172 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1173 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1175 /* Create NEW_TYPE->USE_TYPE conversion. */
1176 tmp = create_tmp_reg (use_type, NULL);
1177 add_referenced_var (tmp);
1178 new_oprnd = make_ssa_name (tmp, NULL);
1179 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1180 var, NULL_TREE);
1181 SSA_NAME_DEF_STMT (new_oprnd) = pattern_stmt;
1182 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1184 *type_in = get_vectype_for_scalar_type (new_type);
1185 *type_out = get_vectype_for_scalar_type (use_type);
1187 /* We created a pattern statement for the last statement in the
1188 sequence, so we don't need to associate it with the pattern
1189 statement created for PREV_STMT. Therefore, we add PREV_STMT
1190 to the list in order to mark it later in vect_pattern_recog_1. */
1191 if (prev_stmt)
1192 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1194 else
1196 if (prev_stmt)
1197 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
1198 = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
1200 *type_in = vectype;
1201 *type_out = NULL_TREE;
1204 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1206 else
1207 /* TODO: support general case, create a conversion to the correct type. */
1208 return NULL;
1210 /* Pattern detected. */
1211 if (vect_print_dump_info (REPORT_DETAILS))
1213 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1214 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1217 return pattern_stmt;
1221 /* Mark statements that are involved in a pattern. */
1223 static inline void
1224 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
1225 tree pattern_vectype)
1227 stmt_vec_info pattern_stmt_info, def_stmt_info;
1228 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1229 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
1230 gimple def_stmt;
1232 set_vinfo_for_stmt (pattern_stmt,
1233 new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
1234 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1235 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
1237 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
1238 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
1239 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1240 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
1241 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
1242 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
1243 STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
1244 = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
1245 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
1247 def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
1248 set_vinfo_for_stmt (def_stmt,
1249 new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
1250 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
1251 def_stmt_info = vinfo_for_stmt (def_stmt);
1252 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
1253 STMT_VINFO_DEF_TYPE (def_stmt_info)
1254 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1255 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
1259 /* Function vect_pattern_recog_1
1261 Input:
1262 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
1263 computation pattern.
1264 STMT: A stmt from which the pattern search should start.
1266 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
1267 expression that computes the same functionality and can be used to
1268 replace the sequence of stmts that are involved in the pattern.
1270 Output:
1271 This function checks if the expression returned by PATTERN_RECOG_FUNC is
1272 supported in vector form by the target. We use 'TYPE_IN' to obtain the
1273 relevant vector type. If 'TYPE_IN' is already a vector type, then this
1274 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
1275 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
1276 to the available target pattern.
1278 This function also does some bookkeeping, as explained in the documentation
1279 for vect_recog_pattern. */
1281 static void
1282 vect_pattern_recog_1 (
1283 gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *),
1284 gimple_stmt_iterator si)
1286 gimple stmt = gsi_stmt (si), pattern_stmt;
1287 stmt_vec_info stmt_info;
1288 loop_vec_info loop_vinfo;
1289 tree pattern_vectype;
1290 tree type_in, type_out;
1291 enum tree_code code;
1292 int i;
1293 gimple next;
1294 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
1296 VEC_quick_push (gimple, stmts_to_replace, stmt);
1297 pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out);
1298 if (!pattern_stmt)
1299 return;
1301 stmt = VEC_last (gimple, stmts_to_replace);
1302 stmt_info = vinfo_for_stmt (stmt);
1303 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1305 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
1307 /* No need to check target support (already checked by the pattern
1308 recognition function). */
1309 if (type_out)
1310 gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out)));
1311 pattern_vectype = type_out ? type_out : type_in;
1313 else
1315 enum machine_mode vec_mode;
1316 enum insn_code icode;
1317 optab optab;
1319 /* Check target support */
1320 type_in = get_vectype_for_scalar_type (type_in);
1321 if (!type_in)
1322 return;
1323 if (type_out)
1324 type_out = get_vectype_for_scalar_type (type_out);
1325 else
1326 type_out = type_in;
1327 if (!type_out)
1328 return;
1329 pattern_vectype = type_out;
1331 if (is_gimple_assign (pattern_stmt))
1332 code = gimple_assign_rhs_code (pattern_stmt);
1333 else
1335 gcc_assert (is_gimple_call (pattern_stmt));
1336 code = CALL_EXPR;
1339 optab = optab_for_tree_code (code, type_in, optab_default);
1340 vec_mode = TYPE_MODE (type_in);
1341 if (!optab
1342 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
1343 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
1344 return;
1347 /* Found a vectorizable pattern. */
1348 if (vect_print_dump_info (REPORT_DETAILS))
1350 fprintf (vect_dump, "pattern recognized: ");
1351 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1354 /* Mark the stmts that are involved in the pattern. */
1355 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
1357 /* Patterns cannot be vectorized using SLP, because they change the order of
1358 computation. */
1359 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
1360 if (next == stmt)
1361 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
1363 /* It is possible that additional pattern stmts are created and inserted in
1364 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
1365 relevant statements. */
1366 for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
1367 && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
1368 i++)
1370 stmt_info = vinfo_for_stmt (stmt);
1371 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1372 if (vect_print_dump_info (REPORT_DETAILS))
1374 fprintf (vect_dump, "additional pattern stmt: ");
1375 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1378 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
1381 VEC_free (gimple, heap, stmts_to_replace);
1385 /* Function vect_pattern_recog
1387 Input:
1388 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
1389 computation idioms.
1391 Output - for each computation idiom that is detected we create a new stmt
1392 that provides the same functionality and that can be vectorized. We
1393 also record some information in the struct_stmt_info of the relevant
1394 stmts, as explained below:
1396 At the entry to this function we have the following stmts, with the
1397 following initial value in the STMT_VINFO fields:
1399 stmt in_pattern_p related_stmt vec_stmt
1400 S1: a_i = .... - - -
1401 S2: a_2 = ..use(a_i).. - - -
1402 S3: a_1 = ..use(a_2).. - - -
1403 S4: a_0 = ..use(a_1).. - - -
1404 S5: ... = ..use(a_0).. - - -
1406 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
1407 represented by a single stmt. We then:
1408 - create a new stmt S6 equivalent to the pattern (the stmt is not
1409 inserted into the code)
1410 - fill in the STMT_VINFO fields as follows:
1412 in_pattern_p related_stmt vec_stmt
1413 S1: a_i = .... - - -
1414 S2: a_2 = ..use(a_i).. - - -
1415 S3: a_1 = ..use(a_2).. - - -
1416 S4: a_0 = ..use(a_1).. true S6 -
1417 '---> S6: a_new = .... - S4 -
1418 S5: ... = ..use(a_0).. - - -
1420 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
1421 to each other through the RELATED_STMT field).
1423 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
1424 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
1425 remain irrelevant unless used by stmts other than S4.
1427 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
1428 (because they are marked as irrelevant). It will vectorize S6, and record
1429 a pointer to the new vector stmt VS6 from S6 (as usual).
1430 S4 will be skipped, and S5 will be vectorized as usual:
1432 in_pattern_p related_stmt vec_stmt
1433 S1: a_i = .... - - -
1434 S2: a_2 = ..use(a_i).. - - -
1435 S3: a_1 = ..use(a_2).. - - -
1436 > VS6: va_new = .... - - -
1437 S4: a_0 = ..use(a_1).. true S6 VS6
1438 '---> S6: a_new = .... - S4 VS6
1439 > VS5: ... = ..vuse(va_new).. - - -
1440 S5: ... = ..use(a_0).. - - -
1442 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
1443 elsewhere), and we'll end up with:
1445 VS6: va_new = ....
1446 VS5: ... = ..vuse(va_new)..
1448 In case of more than one pattern statements, e.g., widen-mult with
1449 intermediate type:
1451 S1 a_t = ;
1452 S2 a_T = (TYPE) a_t;
1453 '--> S3: a_it = (interm_type) a_t;
1454 S4 prod_T = a_T * CONST;
1455 '--> S5: prod_T' = a_it w* CONST;
1457 there may be other users of a_T outside the pattern. In that case S2 will
1458 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
1459 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
1460 be recorded in S3. */
1462 void
1463 vect_pattern_recog (loop_vec_info loop_vinfo)
1465 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1466 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1467 unsigned int nbbs = loop->num_nodes;
1468 gimple_stmt_iterator si;
1469 unsigned int i, j;
1470 gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
1472 if (vect_print_dump_info (REPORT_DETAILS))
1473 fprintf (vect_dump, "=== vect_pattern_recog ===");
1475 /* Scan through the loop stmts, applying the pattern recognition
1476 functions starting at each stmt visited: */
1477 for (i = 0; i < nbbs; i++)
1479 basic_block bb = bbs[i];
1480 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1482 /* Scan over all generic vect_recog_xxx_pattern functions. */
1483 for (j = 0; j < NUM_PATTERNS; j++)
1485 vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
1486 vect_pattern_recog_1 (vect_recog_func_ptr, si);