2015-11-09 Steve Ellcey <sellcey@imgtec.com>
[official-gcc.git] / gcc / tree-vect-patterns.c
blobd003d335f5df61d56fcbdca4da49e50c3f8ea077
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
43 /* Pattern recognition functions */
44 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
45 tree *);
46 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
47 tree *);
48 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
49 tree *);
50 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
53 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
54 tree *);
55 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
56 tree *, tree *);
57 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
58 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
59 tree *, tree *);
60 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
61 tree *, tree *);
63 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
64 tree *, tree *);
66 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
67 tree *, tree *);
68 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
69 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
70 vect_recog_widen_mult_pattern,
71 vect_recog_widen_sum_pattern,
72 vect_recog_dot_prod_pattern,
73 vect_recog_sad_pattern,
74 vect_recog_pow_pattern,
75 vect_recog_widen_shift_pattern,
76 vect_recog_over_widening_pattern,
77 vect_recog_rotate_pattern,
78 vect_recog_vector_vector_shift_pattern,
79 vect_recog_divmod_pattern,
80 vect_recog_mult_pattern,
81 vect_recog_mixed_size_cond_pattern,
82 vect_recog_bool_pattern};
84 static inline void
85 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
87 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
88 stmt);
91 static inline void
92 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
94 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
95 append_pattern_def_seq (stmt_info, stmt);
98 /* Check whether STMT2 is in the same loop or basic block as STMT1.
99 Which of the two applies depends on whether we're currently doing
100 loop-based or basic-block-based vectorization, as determined by
101 the vinfo_for_stmt for STMT1 (which must be defined).
103 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
104 to be defined as well. */
106 static bool
107 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
109 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
110 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
113 /* If the LHS of DEF_STMT has a single use, and that statement is
114 in the same loop or basic block, return it. */
116 static gimple *
117 vect_single_imm_use (gimple *def_stmt)
119 tree lhs = gimple_assign_lhs (def_stmt);
120 use_operand_p use_p;
121 gimple *use_stmt;
123 if (!single_imm_use (lhs, &use_p, &use_stmt))
124 return NULL;
126 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
127 return NULL;
129 return use_stmt;
132 /* Check whether NAME, an ssa-name used in USE_STMT,
133 is a result of a type promotion, such that:
134 DEF_STMT: NAME = NOP (name0)
135 If CHECK_SIGN is TRUE, check that either both types are signed or both are
136 unsigned. */
138 static bool
139 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
140 tree *orig_type, gimple **def_stmt, bool *promotion)
142 gimple *dummy_gimple;
143 stmt_vec_info stmt_vinfo;
144 tree type = TREE_TYPE (name);
145 tree oprnd0;
146 enum vect_def_type dt;
148 stmt_vinfo = vinfo_for_stmt (use_stmt);
149 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
150 return false;
152 if (dt != vect_internal_def
153 && dt != vect_external_def && dt != vect_constant_def)
154 return false;
156 if (!*def_stmt)
157 return false;
159 if (!is_gimple_assign (*def_stmt))
160 return false;
162 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
163 return false;
165 oprnd0 = gimple_assign_rhs1 (*def_stmt);
167 *orig_type = TREE_TYPE (oprnd0);
168 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
169 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
170 return false;
172 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
173 *promotion = true;
174 else
175 *promotion = false;
177 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
178 return false;
180 return true;
183 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
184 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
186 static tree
187 vect_recog_temp_ssa_var (tree type, gimple *stmt)
189 return make_temp_ssa_name (type, stmt, "patt");
192 /* Function vect_recog_dot_prod_pattern
194 Try to find the following pattern:
196 type x_t, y_t;
197 TYPE1 prod;
198 TYPE2 sum = init;
199 loop:
200 sum_0 = phi <init, sum_1>
201 S1 x_t = ...
202 S2 y_t = ...
203 S3 x_T = (TYPE1) x_t;
204 S4 y_T = (TYPE1) y_t;
205 S5 prod = x_T * y_T;
206 [S6 prod = (TYPE2) prod; #optional]
207 S7 sum_1 = prod + sum_0;
209 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
210 same size of 'TYPE1' or bigger. This is a special case of a reduction
211 computation.
213 Input:
215 * STMTS: Contains a stmt from which the pattern search begins. In the
216 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
217 will be detected.
219 Output:
221 * TYPE_IN: The type of the input arguments to the pattern.
223 * TYPE_OUT: The type of the output of this pattern.
225 * Return value: A new stmt that will be used to replace the sequence of
226 stmts that constitute the pattern. In this case it will be:
227 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
229 Note: The dot-prod idiom is a widening reduction pattern that is
230 vectorized without preserving all the intermediate results. It
231 produces only N/2 (widened) results (by summing up pairs of
232 intermediate results) rather than all N results. Therefore, we
233 cannot allow this pattern when we want to get all the results and in
234 the correct order (as is the case when this computation is in an
235 inner-loop nested in an outer-loop that us being vectorized). */
237 static gimple *
238 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
239 tree *type_out)
241 gimple *stmt, *last_stmt = (*stmts)[0];
242 tree oprnd0, oprnd1;
243 tree oprnd00, oprnd01;
244 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
245 tree type, half_type;
246 gimple *pattern_stmt;
247 tree prod_type;
248 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
249 struct loop *loop;
250 tree var;
251 bool promotion;
253 if (!loop_info)
254 return NULL;
256 loop = LOOP_VINFO_LOOP (loop_info);
258 /* We don't allow changing the order of the computation in the inner-loop
259 when doing outer-loop vectorization. */
260 if (loop && nested_in_vect_loop_p (loop, last_stmt))
261 return NULL;
263 if (!is_gimple_assign (last_stmt))
264 return NULL;
266 type = gimple_expr_type (last_stmt);
268 /* Look for the following pattern
269 DX = (TYPE1) X;
270 DY = (TYPE1) Y;
271 DPROD = DX * DY;
272 DDPROD = (TYPE2) DPROD;
273 sum_1 = DDPROD + sum_0;
274 In which
275 - DX is double the size of X
276 - DY is double the size of Y
277 - DX, DY, DPROD all have the same type
278 - sum is the same size of DPROD or bigger
279 - sum has been recognized as a reduction variable.
281 This is equivalent to:
282 DPROD = X w* Y; #widen mult
283 sum_1 = DPROD w+ sum_0; #widen summation
285 DPROD = X w* Y; #widen mult
286 sum_1 = DPROD + sum_0; #summation
289 /* Starting from LAST_STMT, follow the defs of its uses in search
290 of the above pattern. */
292 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
293 return NULL;
295 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
297 /* Has been detected as widening-summation? */
299 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
300 type = gimple_expr_type (stmt);
301 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
302 return NULL;
303 oprnd0 = gimple_assign_rhs1 (stmt);
304 oprnd1 = gimple_assign_rhs2 (stmt);
305 half_type = TREE_TYPE (oprnd0);
307 else
309 gimple *def_stmt;
311 oprnd0 = gimple_assign_rhs1 (last_stmt);
312 oprnd1 = gimple_assign_rhs2 (last_stmt);
313 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
314 || !types_compatible_p (TREE_TYPE (oprnd1), type))
315 return NULL;
316 stmt = last_stmt;
318 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
319 &promotion)
320 && promotion)
322 stmt = def_stmt;
323 oprnd0 = gimple_assign_rhs1 (stmt);
325 else
326 half_type = type;
329 /* So far so good. Since last_stmt was detected as a (summation) reduction,
330 we know that oprnd1 is the reduction variable (defined by a loop-header
331 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
332 Left to check that oprnd0 is defined by a (widen_)mult_expr */
333 if (TREE_CODE (oprnd0) != SSA_NAME)
334 return NULL;
336 prod_type = half_type;
337 stmt = SSA_NAME_DEF_STMT (oprnd0);
339 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
340 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
341 return NULL;
343 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
344 inside the loop (in case we are analyzing an outer-loop). */
345 if (!is_gimple_assign (stmt))
346 return NULL;
347 stmt_vinfo = vinfo_for_stmt (stmt);
348 gcc_assert (stmt_vinfo);
349 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
350 return NULL;
351 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
352 return NULL;
353 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
355 /* Has been detected as a widening multiplication? */
357 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
358 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
359 return NULL;
360 stmt_vinfo = vinfo_for_stmt (stmt);
361 gcc_assert (stmt_vinfo);
362 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
363 oprnd00 = gimple_assign_rhs1 (stmt);
364 oprnd01 = gimple_assign_rhs2 (stmt);
365 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
366 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
368 else
370 tree half_type0, half_type1;
371 gimple *def_stmt;
372 tree oprnd0, oprnd1;
374 oprnd0 = gimple_assign_rhs1 (stmt);
375 oprnd1 = gimple_assign_rhs2 (stmt);
376 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
377 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
378 return NULL;
379 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
380 &promotion)
381 || !promotion)
382 return NULL;
383 oprnd00 = gimple_assign_rhs1 (def_stmt);
384 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
385 &promotion)
386 || !promotion)
387 return NULL;
388 oprnd01 = gimple_assign_rhs1 (def_stmt);
389 if (!types_compatible_p (half_type0, half_type1))
390 return NULL;
391 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
392 return NULL;
395 half_type = TREE_TYPE (oprnd00);
396 *type_in = half_type;
397 *type_out = type;
399 /* Pattern detected. Create a stmt to be used to replace the pattern: */
400 var = vect_recog_temp_ssa_var (type, NULL);
401 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
402 oprnd00, oprnd01, oprnd1);
404 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vect_recog_dot_prod_pattern: detected: ");
408 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
409 dump_printf (MSG_NOTE, "\n");
412 return pattern_stmt;
416 /* Function vect_recog_sad_pattern
418 Try to find the following Sum of Absolute Difference (SAD) pattern:
420 type x_t, y_t;
421 signed TYPE1 diff, abs_diff;
422 TYPE2 sum = init;
423 loop:
424 sum_0 = phi <init, sum_1>
425 S1 x_t = ...
426 S2 y_t = ...
427 S3 x_T = (TYPE1) x_t;
428 S4 y_T = (TYPE1) y_t;
429 S5 diff = x_T - y_T;
430 S6 abs_diff = ABS_EXPR <diff>;
431 [S7 abs_diff = (TYPE2) abs_diff; #optional]
432 S8 sum_1 = abs_diff + sum_0;
434 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
435 same size of 'TYPE1' or bigger. This is a special case of a reduction
436 computation.
438 Input:
440 * STMTS: Contains a stmt from which the pattern search begins. In the
441 example, when this function is called with S8, the pattern
442 {S3,S4,S5,S6,S7,S8} will be detected.
444 Output:
446 * TYPE_IN: The type of the input arguments to the pattern.
448 * TYPE_OUT: The type of the output of this pattern.
450 * Return value: A new stmt that will be used to replace the sequence of
451 stmts that constitute the pattern. In this case it will be:
452 SAD_EXPR <x_t, y_t, sum_0>
455 static gimple *
456 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
457 tree *type_out)
459 gimple *last_stmt = (*stmts)[0];
460 tree sad_oprnd0, sad_oprnd1;
461 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
462 tree half_type;
463 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
464 struct loop *loop;
465 bool promotion;
467 if (!loop_info)
468 return NULL;
470 loop = LOOP_VINFO_LOOP (loop_info);
472 /* We don't allow changing the order of the computation in the inner-loop
473 when doing outer-loop vectorization. */
474 if (loop && nested_in_vect_loop_p (loop, last_stmt))
475 return NULL;
477 if (!is_gimple_assign (last_stmt))
478 return NULL;
480 tree sum_type = gimple_expr_type (last_stmt);
482 /* Look for the following pattern
483 DX = (TYPE1) X;
484 DY = (TYPE1) Y;
485 DDIFF = DX - DY;
486 DAD = ABS_EXPR <DDIFF>;
487 DDPROD = (TYPE2) DPROD;
488 sum_1 = DAD + sum_0;
489 In which
490 - DX is at least double the size of X
491 - DY is at least double the size of Y
492 - DX, DY, DDIFF, DAD all have the same type
493 - sum is the same size of DAD or bigger
494 - sum has been recognized as a reduction variable.
496 This is equivalent to:
497 DDIFF = X w- Y; #widen sub
498 DAD = ABS_EXPR <DDIFF>;
499 sum_1 = DAD w+ sum_0; #widen summation
501 DDIFF = X w- Y; #widen sub
502 DAD = ABS_EXPR <DDIFF>;
503 sum_1 = DAD + sum_0; #summation
506 /* Starting from LAST_STMT, follow the defs of its uses in search
507 of the above pattern. */
509 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
510 return NULL;
512 tree plus_oprnd0, plus_oprnd1;
514 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
516 /* Has been detected as widening-summation? */
518 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
519 sum_type = gimple_expr_type (stmt);
520 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
521 return NULL;
522 plus_oprnd0 = gimple_assign_rhs1 (stmt);
523 plus_oprnd1 = gimple_assign_rhs2 (stmt);
524 half_type = TREE_TYPE (plus_oprnd0);
526 else
528 gimple *def_stmt;
530 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
531 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
532 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
533 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
534 return NULL;
536 /* The type conversion could be promotion, demotion,
537 or just signed -> unsigned. */
538 if (type_conversion_p (plus_oprnd0, last_stmt, false,
539 &half_type, &def_stmt, &promotion))
540 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
541 else
542 half_type = sum_type;
545 /* So far so good. Since last_stmt was detected as a (summation) reduction,
546 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
547 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
548 Then check that plus_oprnd0 is defined by an abs_expr. */
550 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
551 return NULL;
553 tree abs_type = half_type;
554 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
556 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
557 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
558 return NULL;
560 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
561 inside the loop (in case we are analyzing an outer-loop). */
562 if (!is_gimple_assign (abs_stmt))
563 return NULL;
565 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
566 gcc_assert (abs_stmt_vinfo);
567 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
568 return NULL;
569 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
570 return NULL;
572 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
573 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
574 return NULL;
575 if (TYPE_UNSIGNED (abs_type))
576 return NULL;
578 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
580 if (TREE_CODE (abs_oprnd) != SSA_NAME)
581 return NULL;
583 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
585 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
586 if (!gimple_bb (diff_stmt)
587 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
588 return NULL;
590 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
591 inside the loop (in case we are analyzing an outer-loop). */
592 if (!is_gimple_assign (diff_stmt))
593 return NULL;
595 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
596 gcc_assert (diff_stmt_vinfo);
597 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
598 return NULL;
599 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
600 return NULL;
602 tree half_type0, half_type1;
603 gimple *def_stmt;
605 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
606 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
608 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
609 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
610 return NULL;
611 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
612 &half_type0, &def_stmt, &promotion)
613 || !promotion)
614 return NULL;
615 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
617 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
618 &half_type1, &def_stmt, &promotion)
619 || !promotion)
620 return NULL;
621 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
623 if (!types_compatible_p (half_type0, half_type1))
624 return NULL;
625 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
626 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
627 return NULL;
629 *type_in = TREE_TYPE (sad_oprnd0);
630 *type_out = sum_type;
632 /* Pattern detected. Create a stmt to be used to replace the pattern: */
633 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
634 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
635 sad_oprnd1, plus_oprnd1);
637 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE, vect_location,
640 "vect_recog_sad_pattern: detected: ");
641 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
642 dump_printf (MSG_NOTE, "\n");
645 return pattern_stmt;
649 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
650 and LSHIFT_EXPR.
652 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
653 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
655 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
656 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
657 that satisfies the above restrictions, we can perform a widening opeartion
658 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
659 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
661 static bool
662 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
663 tree const_oprnd, tree *oprnd,
664 gimple **wstmt, tree type,
665 tree *half_type, gimple *def_stmt)
667 tree new_type, new_oprnd;
669 if (code != MULT_EXPR && code != LSHIFT_EXPR)
670 return false;
672 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
673 || (code == LSHIFT_EXPR
674 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
675 != 1))
676 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
678 /* CONST_OPRND is a constant of HALF_TYPE. */
679 *oprnd = gimple_assign_rhs1 (def_stmt);
680 return true;
683 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
684 return false;
686 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
687 return false;
689 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
690 a type 2 times bigger than HALF_TYPE. */
691 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
692 TYPE_UNSIGNED (type));
693 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
694 || (code == LSHIFT_EXPR
695 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
696 return false;
698 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
699 *oprnd = gimple_assign_rhs1 (def_stmt);
700 new_oprnd = make_ssa_name (new_type);
701 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
702 *oprnd = new_oprnd;
704 *half_type = new_type;
705 return true;
709 /* Function vect_recog_widen_mult_pattern
711 Try to find the following pattern:
713 type1 a_t;
714 type2 b_t;
715 TYPE a_T, b_T, prod_T;
717 S1 a_t = ;
718 S2 b_t = ;
719 S3 a_T = (TYPE) a_t;
720 S4 b_T = (TYPE) b_t;
721 S5 prod_T = a_T * b_T;
723 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
725 Also detect unsigned cases:
727 unsigned type1 a_t;
728 unsigned type2 b_t;
729 unsigned TYPE u_prod_T;
730 TYPE a_T, b_T, prod_T;
732 S1 a_t = ;
733 S2 b_t = ;
734 S3 a_T = (TYPE) a_t;
735 S4 b_T = (TYPE) b_t;
736 S5 prod_T = a_T * b_T;
737 S6 u_prod_T = (unsigned TYPE) prod_T;
739 and multiplication by constants:
741 type a_t;
742 TYPE a_T, prod_T;
744 S1 a_t = ;
745 S3 a_T = (TYPE) a_t;
746 S5 prod_T = a_T * CONST;
748 A special case of multiplication by constants is when 'TYPE' is 4 times
749 bigger than 'type', but CONST fits an intermediate type 2 times smaller
750 than 'TYPE'. In that case we create an additional pattern stmt for S3
751 to create a variable of the intermediate type, and perform widen-mult
752 on the intermediate type as well:
754 type a_t;
755 interm_type a_it;
756 TYPE a_T, prod_T, prod_T';
758 S1 a_t = ;
759 S3 a_T = (TYPE) a_t;
760 '--> a_it = (interm_type) a_t;
761 S5 prod_T = a_T * CONST;
762 '--> prod_T' = a_it w* CONST;
764 Input/Output:
766 * STMTS: Contains a stmt from which the pattern search begins. In the
767 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
768 is detected. In case of unsigned widen-mult, the original stmt (S5) is
769 replaced with S6 in STMTS. In case of multiplication by a constant
770 of an intermediate type (the last case above), STMTS also contains S3
771 (inserted before S5).
773 Output:
775 * TYPE_IN: The type of the input arguments to the pattern.
777 * TYPE_OUT: The type of the output of this pattern.
779 * Return value: A new stmt that will be used to replace the sequence of
780 stmts that constitute the pattern. In this case it will be:
781 WIDEN_MULT <a_t, b_t>
782 If the result of WIDEN_MULT needs to be converted to a larger type, the
783 returned stmt will be this type conversion stmt.
786 static gimple *
787 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
788 tree *type_in, tree *type_out)
790 gimple *last_stmt = stmts->pop ();
791 gimple *def_stmt0, *def_stmt1;
792 tree oprnd0, oprnd1;
793 tree type, half_type0, half_type1;
794 gimple *new_stmt = NULL, *pattern_stmt = NULL;
795 tree vectype, vecitype;
796 tree var;
797 enum tree_code dummy_code;
798 int dummy_int;
799 vec<tree> dummy_vec;
800 bool op1_ok;
801 bool promotion;
803 if (!is_gimple_assign (last_stmt))
804 return NULL;
806 type = gimple_expr_type (last_stmt);
808 /* Starting from LAST_STMT, follow the defs of its uses in search
809 of the above pattern. */
811 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
812 return NULL;
814 oprnd0 = gimple_assign_rhs1 (last_stmt);
815 oprnd1 = gimple_assign_rhs2 (last_stmt);
816 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
817 || !types_compatible_p (TREE_TYPE (oprnd1), type))
818 return NULL;
820 /* Check argument 0. */
821 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
822 &promotion)
823 || !promotion)
824 return NULL;
825 /* Check argument 1. */
826 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
827 &def_stmt1, &promotion);
829 if (op1_ok && promotion)
831 oprnd0 = gimple_assign_rhs1 (def_stmt0);
832 oprnd1 = gimple_assign_rhs1 (def_stmt1);
834 else
836 if (TREE_CODE (oprnd1) == INTEGER_CST
837 && TREE_CODE (half_type0) == INTEGER_TYPE
838 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
839 &oprnd0, &new_stmt, type,
840 &half_type0, def_stmt0))
842 half_type1 = half_type0;
843 oprnd1 = fold_convert (half_type1, oprnd1);
845 else
846 return NULL;
849 /* If the two arguments have different sizes, convert the one with
850 the smaller type into the larger type. */
851 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
853 /* If we already used up the single-stmt slot give up. */
854 if (new_stmt)
855 return NULL;
857 tree* oprnd = NULL;
858 gimple *def_stmt = NULL;
860 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
862 def_stmt = def_stmt0;
863 half_type0 = half_type1;
864 oprnd = &oprnd0;
866 else
868 def_stmt = def_stmt1;
869 half_type1 = half_type0;
870 oprnd = &oprnd1;
873 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
874 tree new_oprnd = make_ssa_name (half_type0);
875 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
876 *oprnd = new_oprnd;
879 /* Handle unsigned case. Look for
880 S6 u_prod_T = (unsigned TYPE) prod_T;
881 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
882 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
884 gimple *use_stmt;
885 tree use_lhs;
886 tree use_type;
888 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
889 return NULL;
891 use_stmt = vect_single_imm_use (last_stmt);
892 if (!use_stmt || !is_gimple_assign (use_stmt)
893 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
894 return NULL;
896 use_lhs = gimple_assign_lhs (use_stmt);
897 use_type = TREE_TYPE (use_lhs);
898 if (!INTEGRAL_TYPE_P (use_type)
899 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
900 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
901 return NULL;
903 type = use_type;
904 last_stmt = use_stmt;
907 if (!types_compatible_p (half_type0, half_type1))
908 return NULL;
910 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
911 to get an intermediate result of type ITYPE. In this case we need
912 to build a statement to convert this intermediate result to type TYPE. */
913 tree itype = type;
914 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
915 itype = build_nonstandard_integer_type
916 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
917 TYPE_UNSIGNED (type));
919 /* Pattern detected. */
920 if (dump_enabled_p ())
921 dump_printf_loc (MSG_NOTE, vect_location,
922 "vect_recog_widen_mult_pattern: detected:\n");
924 /* Check target support */
925 vectype = get_vectype_for_scalar_type (half_type0);
926 vecitype = get_vectype_for_scalar_type (itype);
927 if (!vectype
928 || !vecitype
929 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
930 vecitype, vectype,
931 &dummy_code, &dummy_code,
932 &dummy_int, &dummy_vec))
933 return NULL;
935 *type_in = vectype;
936 *type_out = get_vectype_for_scalar_type (type);
938 /* Pattern supported. Create a stmt to be used to replace the pattern: */
939 var = vect_recog_temp_ssa_var (itype, NULL);
940 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
942 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
943 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
945 /* If the original two operands have different sizes, we may need to convert
946 the smaller one into the larget type. If this is the case, at this point
947 the new stmt is already built. */
948 if (new_stmt)
950 append_pattern_def_seq (stmt_vinfo, new_stmt);
951 stmt_vec_info new_stmt_info
952 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
953 set_vinfo_for_stmt (new_stmt, new_stmt_info);
954 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
957 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
958 the result of the widen-mult operation into type TYPE. */
959 if (itype != type)
961 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
962 stmt_vec_info pattern_stmt_info
963 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
964 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
965 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
966 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
967 NOP_EXPR,
968 gimple_assign_lhs (pattern_stmt));
971 if (dump_enabled_p ())
972 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
974 stmts->safe_push (last_stmt);
975 return pattern_stmt;
979 /* Function vect_recog_pow_pattern
981 Try to find the following pattern:
983 x = POW (y, N);
985 with POW being one of pow, powf, powi, powif and N being
986 either 2 or 0.5.
988 Input:
990 * LAST_STMT: A stmt from which the pattern search begins.
992 Output:
994 * TYPE_IN: The type of the input arguments to the pattern.
996 * TYPE_OUT: The type of the output of this pattern.
998 * Return value: A new stmt that will be used to replace the sequence of
999 stmts that constitute the pattern. In this case it will be:
1000 x = x * x
1002 x = sqrt (x)
1005 static gimple *
1006 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1007 tree *type_out)
1009 gimple *last_stmt = (*stmts)[0];
1010 tree fn, base, exp = NULL;
1011 gimple *stmt;
1012 tree var;
1014 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1015 return NULL;
1017 fn = gimple_call_fndecl (last_stmt);
1018 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1019 return NULL;
1021 switch (DECL_FUNCTION_CODE (fn))
1023 case BUILT_IN_POWIF:
1024 case BUILT_IN_POWI:
1025 case BUILT_IN_POWF:
1026 case BUILT_IN_POW:
1027 base = gimple_call_arg (last_stmt, 0);
1028 exp = gimple_call_arg (last_stmt, 1);
1029 if (TREE_CODE (exp) != REAL_CST
1030 && TREE_CODE (exp) != INTEGER_CST)
1031 return NULL;
1032 break;
1034 default:
1035 return NULL;
1038 /* We now have a pow or powi builtin function call with a constant
1039 exponent. */
1041 *type_out = NULL_TREE;
1043 /* Catch squaring. */
1044 if ((tree_fits_shwi_p (exp)
1045 && tree_to_shwi (exp) == 2)
1046 || (TREE_CODE (exp) == REAL_CST
1047 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1049 *type_in = TREE_TYPE (base);
1051 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1052 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1053 return stmt;
1056 /* Catch square root. */
1057 if (TREE_CODE (exp) == REAL_CST
1058 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1060 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1061 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1062 if (*type_in)
1064 gcall *stmt = gimple_build_call (newfn, 1, base);
1065 if (vectorizable_function (stmt, *type_in, *type_in)
1066 != NULL_TREE)
1068 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1069 gimple_call_set_lhs (stmt, var);
1070 return stmt;
1075 return NULL;
1079 /* Function vect_recog_widen_sum_pattern
1081 Try to find the following pattern:
1083 type x_t;
1084 TYPE x_T, sum = init;
1085 loop:
1086 sum_0 = phi <init, sum_1>
1087 S1 x_t = *p;
1088 S2 x_T = (TYPE) x_t;
1089 S3 sum_1 = x_T + sum_0;
1091 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1092 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1093 a special case of a reduction computation.
1095 Input:
1097 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1098 when this function is called with S3, the pattern {S2,S3} will be detected.
1100 Output:
1102 * TYPE_IN: The type of the input arguments to the pattern.
1104 * TYPE_OUT: The type of the output of this pattern.
1106 * Return value: A new stmt that will be used to replace the sequence of
1107 stmts that constitute the pattern. In this case it will be:
1108 WIDEN_SUM <x_t, sum_0>
1110 Note: The widening-sum idiom is a widening reduction pattern that is
1111 vectorized without preserving all the intermediate results. It
1112 produces only N/2 (widened) results (by summing up pairs of
1113 intermediate results) rather than all N results. Therefore, we
1114 cannot allow this pattern when we want to get all the results and in
1115 the correct order (as is the case when this computation is in an
1116 inner-loop nested in an outer-loop that us being vectorized). */
1118 static gimple *
1119 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1120 tree *type_out)
1122 gimple *stmt, *last_stmt = (*stmts)[0];
1123 tree oprnd0, oprnd1;
1124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1125 tree type, half_type;
1126 gimple *pattern_stmt;
1127 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1128 struct loop *loop;
1129 tree var;
1130 bool promotion;
1132 if (!loop_info)
1133 return NULL;
1135 loop = LOOP_VINFO_LOOP (loop_info);
1137 /* We don't allow changing the order of the computation in the inner-loop
1138 when doing outer-loop vectorization. */
1139 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1140 return NULL;
1142 if (!is_gimple_assign (last_stmt))
1143 return NULL;
1145 type = gimple_expr_type (last_stmt);
1147 /* Look for the following pattern
1148 DX = (TYPE) X;
1149 sum_1 = DX + sum_0;
1150 In which DX is at least double the size of X, and sum_1 has been
1151 recognized as a reduction variable.
1154 /* Starting from LAST_STMT, follow the defs of its uses in search
1155 of the above pattern. */
1157 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1158 return NULL;
1160 oprnd0 = gimple_assign_rhs1 (last_stmt);
1161 oprnd1 = gimple_assign_rhs2 (last_stmt);
1162 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1163 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1164 return NULL;
1166 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1167 we know that oprnd1 is the reduction variable (defined by a loop-header
1168 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1169 Left to check that oprnd0 is defined by a cast from type 'type' to type
1170 'TYPE'. */
1172 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1173 &promotion)
1174 || !promotion)
1175 return NULL;
1177 oprnd0 = gimple_assign_rhs1 (stmt);
1178 *type_in = half_type;
1179 *type_out = type;
1181 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1182 var = vect_recog_temp_ssa_var (type, NULL);
1183 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1185 if (dump_enabled_p ())
1187 dump_printf_loc (MSG_NOTE, vect_location,
1188 "vect_recog_widen_sum_pattern: detected: ");
1189 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1190 dump_printf (MSG_NOTE, "\n");
1193 return pattern_stmt;
1197 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1199 Input:
1200 STMT - a statement to check.
1201 DEF - we support operations with two operands, one of which is constant.
1202 The other operand can be defined by a demotion operation, or by a
1203 previous statement in a sequence of over-promoted operations. In the
1204 later case DEF is used to replace that operand. (It is defined by a
1205 pattern statement we created for the previous statement in the
1206 sequence).
1208 Input/output:
1209 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1210 NULL, it's the type of DEF.
1211 STMTS - additional pattern statements. If a pattern statement (type
1212 conversion) is created in this function, its original statement is
1213 added to STMTS.
1215 Output:
1216 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1217 operands to use in the new pattern statement for STMT (will be created
1218 in vect_recog_over_widening_pattern ()).
1219 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1220 statements for STMT: the first one is a type promotion and the second
1221 one is the operation itself. We return the type promotion statement
1222 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1223 the second pattern statement. */
1225 static bool
1226 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1227 tree *op0, tree *op1, gimple **new_def_stmt,
1228 vec<gimple *> *stmts)
1230 enum tree_code code;
1231 tree const_oprnd, oprnd;
1232 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1233 gimple *def_stmt, *new_stmt;
1234 bool first = false;
1235 bool promotion;
1237 *op0 = NULL_TREE;
1238 *op1 = NULL_TREE;
1239 *new_def_stmt = NULL;
1241 if (!is_gimple_assign (stmt))
1242 return false;
1244 code = gimple_assign_rhs_code (stmt);
1245 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1246 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1247 return false;
1249 oprnd = gimple_assign_rhs1 (stmt);
1250 const_oprnd = gimple_assign_rhs2 (stmt);
1251 type = gimple_expr_type (stmt);
1253 if (TREE_CODE (oprnd) != SSA_NAME
1254 || TREE_CODE (const_oprnd) != INTEGER_CST)
1255 return false;
1257 /* If oprnd has other uses besides that in stmt we cannot mark it
1258 as being part of a pattern only. */
1259 if (!has_single_use (oprnd))
1260 return false;
1262 /* If we are in the middle of a sequence, we use DEF from a previous
1263 statement. Otherwise, OPRND has to be a result of type promotion. */
1264 if (*new_type)
1266 half_type = *new_type;
1267 oprnd = def;
1269 else
1271 first = true;
1272 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1273 &promotion)
1274 || !promotion
1275 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1276 return false;
1279 /* Can we perform the operation on a smaller type? */
1280 switch (code)
1282 case BIT_IOR_EXPR:
1283 case BIT_XOR_EXPR:
1284 case BIT_AND_EXPR:
1285 if (!int_fits_type_p (const_oprnd, half_type))
1287 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1288 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1289 return false;
1291 interm_type = build_nonstandard_integer_type (
1292 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1293 if (!int_fits_type_p (const_oprnd, interm_type))
1294 return false;
1297 break;
1299 case LSHIFT_EXPR:
1300 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1301 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1302 return false;
1304 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1305 (e.g., if the original value was char, the shift amount is at most 8
1306 if we want to use short). */
1307 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1308 return false;
1310 interm_type = build_nonstandard_integer_type (
1311 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1313 if (!vect_supportable_shift (code, interm_type))
1314 return false;
1316 break;
1318 case RSHIFT_EXPR:
1319 if (vect_supportable_shift (code, half_type))
1320 break;
1322 /* Try intermediate type - HALF_TYPE is not supported. */
1323 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1324 return false;
1326 interm_type = build_nonstandard_integer_type (
1327 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1329 if (!vect_supportable_shift (code, interm_type))
1330 return false;
1332 break;
1334 default:
1335 gcc_unreachable ();
1338 /* There are four possible cases:
1339 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1340 the first statement in the sequence)
1341 a. The original, HALF_TYPE, is not enough - we replace the promotion
1342 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1343 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1344 promotion.
1345 2. OPRND is defined by a pattern statement we created.
1346 a. Its type is not sufficient for the operation, we create a new stmt:
1347 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1348 this statement in NEW_DEF_STMT, and it is later put in
1349 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1350 b. OPRND is good to use in the new statement. */
1351 if (first)
1353 if (interm_type)
1355 /* Replace the original type conversion HALF_TYPE->TYPE with
1356 HALF_TYPE->INTERM_TYPE. */
1357 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1359 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1360 /* Check if the already created pattern stmt is what we need. */
1361 if (!is_gimple_assign (new_stmt)
1362 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1363 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1364 return false;
1366 stmts->safe_push (def_stmt);
1367 oprnd = gimple_assign_lhs (new_stmt);
1369 else
1371 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1372 oprnd = gimple_assign_rhs1 (def_stmt);
1373 new_oprnd = make_ssa_name (interm_type);
1374 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1375 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1376 stmts->safe_push (def_stmt);
1377 oprnd = new_oprnd;
1380 else
1382 /* Retrieve the operand before the type promotion. */
1383 oprnd = gimple_assign_rhs1 (def_stmt);
1386 else
1388 if (interm_type)
1390 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1391 new_oprnd = make_ssa_name (interm_type);
1392 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1393 oprnd = new_oprnd;
1394 *new_def_stmt = new_stmt;
1397 /* Otherwise, OPRND is already set. */
1400 if (interm_type)
1401 *new_type = interm_type;
1402 else
1403 *new_type = half_type;
1405 *op0 = oprnd;
1406 *op1 = fold_convert (*new_type, const_oprnd);
1408 return true;
1412 /* Try to find a statement or a sequence of statements that can be performed
1413 on a smaller type:
1415 type x_t;
1416 TYPE x_T, res0_T, res1_T;
1417 loop:
1418 S1 x_t = *p;
1419 S2 x_T = (TYPE) x_t;
1420 S3 res0_T = op (x_T, C0);
1421 S4 res1_T = op (res0_T, C1);
1422 S5 ... = () res1_T; - type demotion
1424 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1425 constants.
1426 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1427 be 'type' or some intermediate type. For now, we expect S5 to be a type
1428 demotion operation. We also check that S3 and S4 have only one use. */
1430 static gimple *
1431 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1432 tree *type_in, tree *type_out)
1434 gimple *stmt = stmts->pop ();
1435 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1436 *use_stmt = NULL;
1437 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1438 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1439 bool first;
1440 tree type = NULL;
1442 first = true;
1443 while (1)
1445 if (!vinfo_for_stmt (stmt)
1446 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1447 return NULL;
1449 new_def_stmt = NULL;
1450 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1451 &op0, &op1, &new_def_stmt,
1452 stmts))
1454 if (first)
1455 return NULL;
1456 else
1457 break;
1460 /* STMT can be performed on a smaller type. Check its uses. */
1461 use_stmt = vect_single_imm_use (stmt);
1462 if (!use_stmt || !is_gimple_assign (use_stmt))
1463 return NULL;
1465 /* Create pattern statement for STMT. */
1466 vectype = get_vectype_for_scalar_type (new_type);
1467 if (!vectype)
1468 return NULL;
1470 /* We want to collect all the statements for which we create pattern
1471 statetments, except for the case when the last statement in the
1472 sequence doesn't have a corresponding pattern statement. In such
1473 case we associate the last pattern statement with the last statement
1474 in the sequence. Therefore, we only add the original statement to
1475 the list if we know that it is not the last. */
1476 if (prev_stmt)
1477 stmts->safe_push (prev_stmt);
1479 var = vect_recog_temp_ssa_var (new_type, NULL);
1480 pattern_stmt
1481 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1482 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1483 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1485 if (dump_enabled_p ())
1487 dump_printf_loc (MSG_NOTE, vect_location,
1488 "created pattern stmt: ");
1489 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1490 dump_printf (MSG_NOTE, "\n");
1493 type = gimple_expr_type (stmt);
1494 prev_stmt = stmt;
1495 stmt = use_stmt;
1497 first = false;
1500 /* We got a sequence. We expect it to end with a type demotion operation.
1501 Otherwise, we quit (for now). There are three possible cases: the
1502 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1503 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1504 NEW_TYPE differs (we create a new conversion statement). */
1505 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1507 use_lhs = gimple_assign_lhs (use_stmt);
1508 use_type = TREE_TYPE (use_lhs);
1509 /* Support only type demotion or signedess change. */
1510 if (!INTEGRAL_TYPE_P (use_type)
1511 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1512 return NULL;
1514 /* Check that NEW_TYPE is not bigger than the conversion result. */
1515 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1516 return NULL;
1518 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1519 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1521 /* Create NEW_TYPE->USE_TYPE conversion. */
1522 new_oprnd = make_ssa_name (use_type);
1523 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1524 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1526 *type_in = get_vectype_for_scalar_type (new_type);
1527 *type_out = get_vectype_for_scalar_type (use_type);
1529 /* We created a pattern statement for the last statement in the
1530 sequence, so we don't need to associate it with the pattern
1531 statement created for PREV_STMT. Therefore, we add PREV_STMT
1532 to the list in order to mark it later in vect_pattern_recog_1. */
1533 if (prev_stmt)
1534 stmts->safe_push (prev_stmt);
1536 else
1538 if (prev_stmt)
1539 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1540 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1542 *type_in = vectype;
1543 *type_out = NULL_TREE;
1546 stmts->safe_push (use_stmt);
1548 else
1549 /* TODO: support general case, create a conversion to the correct type. */
1550 return NULL;
1552 /* Pattern detected. */
1553 if (dump_enabled_p ())
1555 dump_printf_loc (MSG_NOTE, vect_location,
1556 "vect_recog_over_widening_pattern: detected: ");
1557 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1558 dump_printf (MSG_NOTE, "\n");
1561 return pattern_stmt;
1564 /* Detect widening shift pattern:
1566 type a_t;
1567 TYPE a_T, res_T;
1569 S1 a_t = ;
1570 S2 a_T = (TYPE) a_t;
1571 S3 res_T = a_T << CONST;
1573 where type 'TYPE' is at least double the size of type 'type'.
1575 Also detect cases where the shift result is immediately converted
1576 to another type 'result_type' that is no larger in size than 'TYPE'.
1577 In those cases we perform a widen-shift that directly results in
1578 'result_type', to avoid a possible over-widening situation:
1580 type a_t;
1581 TYPE a_T, res_T;
1582 result_type res_result;
1584 S1 a_t = ;
1585 S2 a_T = (TYPE) a_t;
1586 S3 res_T = a_T << CONST;
1587 S4 res_result = (result_type) res_T;
1588 '--> res_result' = a_t w<< CONST;
1590 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1591 create an additional pattern stmt for S2 to create a variable of an
1592 intermediate type, and perform widen-shift on the intermediate type:
1594 type a_t;
1595 interm_type a_it;
1596 TYPE a_T, res_T, res_T';
1598 S1 a_t = ;
1599 S2 a_T = (TYPE) a_t;
1600 '--> a_it = (interm_type) a_t;
1601 S3 res_T = a_T << CONST;
1602 '--> res_T' = a_it <<* CONST;
1604 Input/Output:
1606 * STMTS: Contains a stmt from which the pattern search begins.
1607 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1608 in STMTS. When an intermediate type is used and a pattern statement is
1609 created for S2, we also put S2 here (before S3).
1611 Output:
1613 * TYPE_IN: The type of the input arguments to the pattern.
1615 * TYPE_OUT: The type of the output of this pattern.
1617 * Return value: A new stmt that will be used to replace the sequence of
1618 stmts that constitute the pattern. In this case it will be:
1619 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1621 static gimple *
1622 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1623 tree *type_in, tree *type_out)
1625 gimple *last_stmt = stmts->pop ();
1626 gimple *def_stmt0;
1627 tree oprnd0, oprnd1;
1628 tree type, half_type0;
1629 gimple *pattern_stmt;
1630 tree vectype, vectype_out = NULL_TREE;
1631 tree var;
1632 enum tree_code dummy_code;
1633 int dummy_int;
1634 vec<tree> dummy_vec;
1635 gimple *use_stmt;
1636 bool promotion;
1638 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1639 return NULL;
1641 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1642 return NULL;
1644 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1645 return NULL;
1647 oprnd0 = gimple_assign_rhs1 (last_stmt);
1648 oprnd1 = gimple_assign_rhs2 (last_stmt);
1649 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1650 return NULL;
1652 /* Check operand 0: it has to be defined by a type promotion. */
1653 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1654 &promotion)
1655 || !promotion)
1656 return NULL;
1658 /* Check operand 1: has to be positive. We check that it fits the type
1659 in vect_handle_widen_op_by_const (). */
1660 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1661 return NULL;
1663 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1664 type = gimple_expr_type (last_stmt);
1666 /* Check for subsequent conversion to another type. */
1667 use_stmt = vect_single_imm_use (last_stmt);
1668 if (use_stmt && is_gimple_assign (use_stmt)
1669 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1670 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1672 tree use_lhs = gimple_assign_lhs (use_stmt);
1673 tree use_type = TREE_TYPE (use_lhs);
1675 if (INTEGRAL_TYPE_P (use_type)
1676 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1678 last_stmt = use_stmt;
1679 type = use_type;
1683 /* Check if this a widening operation. */
1684 gimple *wstmt = NULL;
1685 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1686 &oprnd0, &wstmt,
1687 type, &half_type0, def_stmt0))
1688 return NULL;
1690 /* Pattern detected. */
1691 if (dump_enabled_p ())
1692 dump_printf_loc (MSG_NOTE, vect_location,
1693 "vect_recog_widen_shift_pattern: detected:\n");
1695 /* Check target support. */
1696 vectype = get_vectype_for_scalar_type (half_type0);
1697 vectype_out = get_vectype_for_scalar_type (type);
1699 if (!vectype
1700 || !vectype_out
1701 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1702 vectype_out, vectype,
1703 &dummy_code, &dummy_code,
1704 &dummy_int, &dummy_vec))
1705 return NULL;
1707 *type_in = vectype;
1708 *type_out = vectype_out;
1710 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1711 var = vect_recog_temp_ssa_var (type, NULL);
1712 pattern_stmt =
1713 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1714 if (wstmt)
1716 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1717 new_pattern_def_seq (stmt_vinfo, wstmt);
1718 stmt_vec_info new_stmt_info
1719 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1720 set_vinfo_for_stmt (wstmt, new_stmt_info);
1721 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1724 if (dump_enabled_p ())
1725 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1727 stmts->safe_push (last_stmt);
1728 return pattern_stmt;
1731 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1733 type a_t, b_t, c_t;
1735 S0 a_t = b_t r<< c_t;
1737 Input/Output:
1739 * STMTS: Contains a stmt from which the pattern search begins,
1740 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1741 with a sequence:
1743 S1 d_t = -c_t;
1744 S2 e_t = d_t & (B - 1);
1745 S3 f_t = b_t << c_t;
1746 S4 g_t = b_t >> e_t;
1747 S0 a_t = f_t | g_t;
1749 where B is element bitsize of type.
1751 Output:
1753 * TYPE_IN: The type of the input arguments to the pattern.
1755 * TYPE_OUT: The type of the output of this pattern.
1757 * Return value: A new stmt that will be used to replace the rotate
1758 S0 stmt. */
1760 static gimple *
1761 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1763 gimple *last_stmt = stmts->pop ();
1764 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1765 gimple *pattern_stmt, *def_stmt;
1766 enum tree_code rhs_code;
1767 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1768 vec_info *vinfo = stmt_vinfo->vinfo;
1769 enum vect_def_type dt;
1770 optab optab1, optab2;
1771 edge ext_def = NULL;
1773 if (!is_gimple_assign (last_stmt))
1774 return NULL;
1776 rhs_code = gimple_assign_rhs_code (last_stmt);
1777 switch (rhs_code)
1779 case LROTATE_EXPR:
1780 case RROTATE_EXPR:
1781 break;
1782 default:
1783 return NULL;
1786 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1787 return NULL;
1789 lhs = gimple_assign_lhs (last_stmt);
1790 oprnd0 = gimple_assign_rhs1 (last_stmt);
1791 type = TREE_TYPE (oprnd0);
1792 oprnd1 = gimple_assign_rhs2 (last_stmt);
1793 if (TREE_CODE (oprnd0) != SSA_NAME
1794 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1795 || !INTEGRAL_TYPE_P (type)
1796 || !TYPE_UNSIGNED (type))
1797 return NULL;
1799 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1800 return NULL;
1802 if (dt != vect_internal_def
1803 && dt != vect_constant_def
1804 && dt != vect_external_def)
1805 return NULL;
1807 vectype = get_vectype_for_scalar_type (type);
1808 if (vectype == NULL_TREE)
1809 return NULL;
1811 /* If vector/vector or vector/scalar rotate is supported by the target,
1812 don't do anything here. */
1813 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1814 if (optab1
1815 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1816 return NULL;
1818 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1820 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1821 if (optab2
1822 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1823 return NULL;
1826 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1827 don't do anything here either. */
1828 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1829 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1830 if (!optab1
1831 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1832 || !optab2
1833 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1835 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1836 return NULL;
1837 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1838 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1839 if (!optab1
1840 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1841 || !optab2
1842 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1843 return NULL;
1846 *type_in = vectype;
1847 *type_out = vectype;
1848 if (*type_in == NULL_TREE)
1849 return NULL;
1851 if (dt == vect_external_def
1852 && TREE_CODE (oprnd1) == SSA_NAME
1853 && is_a <loop_vec_info> (vinfo))
1855 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1856 ext_def = loop_preheader_edge (loop);
1857 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1859 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1860 if (bb == NULL
1861 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1862 ext_def = NULL;
1866 def = NULL_TREE;
1867 if (TREE_CODE (oprnd1) == INTEGER_CST
1868 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1869 def = oprnd1;
1870 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1872 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1873 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1874 && TYPE_PRECISION (TREE_TYPE (rhs1))
1875 == TYPE_PRECISION (type))
1876 def = rhs1;
1879 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1880 if (def == NULL_TREE)
1882 def = vect_recog_temp_ssa_var (type, NULL);
1883 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1884 if (ext_def)
1886 basic_block new_bb
1887 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1888 gcc_assert (!new_bb);
1890 else
1891 append_pattern_def_seq (stmt_vinfo, def_stmt);
1893 stype = TREE_TYPE (def);
1895 if (TREE_CODE (def) == INTEGER_CST)
1897 if (!tree_fits_uhwi_p (def)
1898 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1899 || integer_zerop (def))
1900 return NULL;
1901 def2 = build_int_cst (stype,
1902 GET_MODE_PRECISION (TYPE_MODE (type))
1903 - tree_to_uhwi (def));
1905 else
1907 tree vecstype = get_vectype_for_scalar_type (stype);
1908 stmt_vec_info def_stmt_vinfo;
1910 if (vecstype == NULL_TREE)
1911 return NULL;
1912 def2 = vect_recog_temp_ssa_var (stype, NULL);
1913 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1914 if (ext_def)
1916 basic_block new_bb
1917 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1918 gcc_assert (!new_bb);
1920 else
1922 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1923 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1924 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1925 append_pattern_def_seq (stmt_vinfo, def_stmt);
1928 def2 = vect_recog_temp_ssa_var (stype, NULL);
1929 tree mask
1930 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1931 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1932 gimple_assign_lhs (def_stmt), mask);
1933 if (ext_def)
1935 basic_block new_bb
1936 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1937 gcc_assert (!new_bb);
1939 else
1941 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1942 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1943 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1944 append_pattern_def_seq (stmt_vinfo, def_stmt);
1948 var1 = vect_recog_temp_ssa_var (type, NULL);
1949 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1950 ? LSHIFT_EXPR : RSHIFT_EXPR,
1951 oprnd0, def);
1952 append_pattern_def_seq (stmt_vinfo, def_stmt);
1954 var2 = vect_recog_temp_ssa_var (type, NULL);
1955 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1956 ? RSHIFT_EXPR : LSHIFT_EXPR,
1957 oprnd0, def2);
1958 append_pattern_def_seq (stmt_vinfo, def_stmt);
1960 /* Pattern detected. */
1961 if (dump_enabled_p ())
1962 dump_printf_loc (MSG_NOTE, vect_location,
1963 "vect_recog_rotate_pattern: detected:\n");
1965 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1966 var = vect_recog_temp_ssa_var (type, NULL);
1967 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1969 if (dump_enabled_p ())
1970 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1972 stmts->safe_push (last_stmt);
1973 return pattern_stmt;
1976 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1977 vectorized:
1979 type a_t;
1980 TYPE b_T, res_T;
1982 S1 a_t = ;
1983 S2 b_T = ;
1984 S3 res_T = b_T op a_t;
1986 where type 'TYPE' is a type with different size than 'type',
1987 and op is <<, >> or rotate.
1989 Also detect cases:
1991 type a_t;
1992 TYPE b_T, c_T, res_T;
1994 S0 c_T = ;
1995 S1 a_t = (type) c_T;
1996 S2 b_T = ;
1997 S3 res_T = b_T op a_t;
1999 Input/Output:
2001 * STMTS: Contains a stmt from which the pattern search begins,
2002 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2003 with a shift/rotate which has same type on both operands, in the
2004 second case just b_T op c_T, in the first case with added cast
2005 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2007 Output:
2009 * TYPE_IN: The type of the input arguments to the pattern.
2011 * TYPE_OUT: The type of the output of this pattern.
2013 * Return value: A new stmt that will be used to replace the shift/rotate
2014 S3 stmt. */
2016 static gimple *
2017 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2018 tree *type_in, tree *type_out)
2020 gimple *last_stmt = stmts->pop ();
2021 tree oprnd0, oprnd1, lhs, var;
2022 gimple *pattern_stmt, *def_stmt;
2023 enum tree_code rhs_code;
2024 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2025 vec_info *vinfo = stmt_vinfo->vinfo;
2026 enum vect_def_type dt;
2028 if (!is_gimple_assign (last_stmt))
2029 return NULL;
2031 rhs_code = gimple_assign_rhs_code (last_stmt);
2032 switch (rhs_code)
2034 case LSHIFT_EXPR:
2035 case RSHIFT_EXPR:
2036 case LROTATE_EXPR:
2037 case RROTATE_EXPR:
2038 break;
2039 default:
2040 return NULL;
2043 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2044 return NULL;
2046 lhs = gimple_assign_lhs (last_stmt);
2047 oprnd0 = gimple_assign_rhs1 (last_stmt);
2048 oprnd1 = gimple_assign_rhs2 (last_stmt);
2049 if (TREE_CODE (oprnd0) != SSA_NAME
2050 || TREE_CODE (oprnd1) != SSA_NAME
2051 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2052 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2053 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2054 || TYPE_PRECISION (TREE_TYPE (lhs))
2055 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2056 return NULL;
2058 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2059 return NULL;
2061 if (dt != vect_internal_def)
2062 return NULL;
2064 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2065 *type_out = *type_in;
2066 if (*type_in == NULL_TREE)
2067 return NULL;
2069 tree def = NULL_TREE;
2070 if (gimple_assign_cast_p (def_stmt))
2072 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2073 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2074 && TYPE_PRECISION (TREE_TYPE (rhs1))
2075 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2076 def = rhs1;
2079 if (def == NULL_TREE)
2081 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2082 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2083 new_pattern_def_seq (stmt_vinfo, def_stmt);
2086 /* Pattern detected. */
2087 if (dump_enabled_p ())
2088 dump_printf_loc (MSG_NOTE, vect_location,
2089 "vect_recog_vector_vector_shift_pattern: detected:\n");
2091 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2092 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2093 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2095 if (dump_enabled_p ())
2096 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2098 stmts->safe_push (last_stmt);
2099 return pattern_stmt;
2102 /* Detect multiplication by constant which are postive or negatives of power 2,
2103 and convert them to shift patterns.
2105 Mult with constants that are postive power of two.
2106 type a_t;
2107 type b_t
2108 S1: b_t = a_t * n
2112 Mult with constants that are negative power of two.
2113 S2: b_t = a_t * -n
2115 Input/Output:
2117 STMTS: Contains a stmt from which the pattern search begins,
2118 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2119 constant operand is a power of 2.
2120 type a_t, b_t
2121 S1': b_t = a_t << log2 (n)
2123 Convert the mult operation to LSHIFT and followed by a NEGATE
2124 if constant operand is a negative power of 2.
2125 type a_t, b_t, res_T;
2126 S2': b_t = a_t << log2 (n)
2127 S3': res_T = - (b_t)
2129 Output:
2131 * TYPE_IN: The type of the input arguments to the pattern.
2133 * TYPE_OUT: The type of the output of this pattern.
2135 * Return value: A new stmt that will be used to replace the multiplication
2136 S1 or S2 stmt. */
2138 static gimple *
2139 vect_recog_mult_pattern (vec<gimple *> *stmts,
2140 tree *type_in, tree *type_out)
2142 gimple *last_stmt = stmts->pop ();
2143 tree oprnd0, oprnd1, vectype, itype;
2144 gimple *pattern_stmt, *def_stmt;
2145 optab optab;
2146 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2147 int power2_val, power2_neg_val;
2148 tree shift;
2150 if (!is_gimple_assign (last_stmt))
2151 return NULL;
2153 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2154 return NULL;
2156 oprnd0 = gimple_assign_rhs1 (last_stmt);
2157 oprnd1 = gimple_assign_rhs2 (last_stmt);
2158 itype = TREE_TYPE (oprnd0);
2160 if (TREE_CODE (oprnd0) != SSA_NAME
2161 || TREE_CODE (oprnd1) != INTEGER_CST
2162 || !INTEGRAL_TYPE_P (itype)
2163 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2164 return NULL;
2166 vectype = get_vectype_for_scalar_type (itype);
2167 if (vectype == NULL_TREE)
2168 return NULL;
2170 /* If the target can handle vectorized multiplication natively,
2171 don't attempt to optimize this. */
2172 optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2173 if (optab != unknown_optab)
2175 machine_mode vec_mode = TYPE_MODE (vectype);
2176 int icode = (int) optab_handler (optab, vec_mode);
2177 if (icode != CODE_FOR_nothing)
2178 return NULL;
2181 /* If target cannot handle vector left shift then we cannot
2182 optimize and bail out. */
2183 optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2184 if (!optab
2185 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2186 return NULL;
2188 power2_val = wi::exact_log2 (oprnd1);
2189 power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2191 /* Handle constant operands that are postive or negative powers of 2. */
2192 if (power2_val != -1)
2194 shift = build_int_cst (itype, power2_val);
2195 pattern_stmt
2196 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2197 LSHIFT_EXPR, oprnd0, shift);
2199 else if (power2_neg_val != -1)
2201 /* If the target cannot handle vector NEGATE then we cannot
2202 do the optimization. */
2203 optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2204 if (!optab
2205 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2206 return NULL;
2208 shift = build_int_cst (itype, power2_neg_val);
2209 def_stmt
2210 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2211 LSHIFT_EXPR, oprnd0, shift);
2212 new_pattern_def_seq (stmt_vinfo, def_stmt);
2213 pattern_stmt
2214 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2215 NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2217 else
2218 return NULL;
2220 /* Pattern detected. */
2221 if (dump_enabled_p ())
2222 dump_printf_loc (MSG_NOTE, vect_location,
2223 "vect_recog_mult_pattern: detected:\n");
2225 if (dump_enabled_p ())
2226 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2227 pattern_stmt,0);
2229 stmts->safe_push (last_stmt);
2230 *type_in = vectype;
2231 *type_out = vectype;
2233 return pattern_stmt;
2236 /* Detect a signed division by a constant that wouldn't be
2237 otherwise vectorized:
2239 type a_t, b_t;
2241 S1 a_t = b_t / N;
2243 where type 'type' is an integral type and N is a constant.
2245 Similarly handle modulo by a constant:
2247 S4 a_t = b_t % N;
2249 Input/Output:
2251 * STMTS: Contains a stmt from which the pattern search begins,
2252 i.e. the division stmt. S1 is replaced by if N is a power
2253 of two constant and type is signed:
2254 S3 y_t = b_t < 0 ? N - 1 : 0;
2255 S2 x_t = b_t + y_t;
2256 S1' a_t = x_t >> log2 (N);
2258 S4 is replaced if N is a power of two constant and
2259 type is signed by (where *_T temporaries have unsigned type):
2260 S9 y_T = b_t < 0 ? -1U : 0U;
2261 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2262 S7 z_t = (type) z_T;
2263 S6 w_t = b_t + z_t;
2264 S5 x_t = w_t & (N - 1);
2265 S4' a_t = x_t - z_t;
2267 Output:
2269 * TYPE_IN: The type of the input arguments to the pattern.
2271 * TYPE_OUT: The type of the output of this pattern.
2273 * Return value: A new stmt that will be used to replace the division
2274 S1 or modulo S4 stmt. */
2276 static gimple *
2277 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2278 tree *type_in, tree *type_out)
2280 gimple *last_stmt = stmts->pop ();
2281 tree oprnd0, oprnd1, vectype, itype, cond;
2282 gimple *pattern_stmt, *def_stmt;
2283 enum tree_code rhs_code;
2284 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2285 vec_info *vinfo = stmt_vinfo->vinfo;
2286 optab optab;
2287 tree q;
2288 int dummy_int, prec;
2289 stmt_vec_info def_stmt_vinfo;
2291 if (!is_gimple_assign (last_stmt))
2292 return NULL;
2294 rhs_code = gimple_assign_rhs_code (last_stmt);
2295 switch (rhs_code)
2297 case TRUNC_DIV_EXPR:
2298 case TRUNC_MOD_EXPR:
2299 break;
2300 default:
2301 return NULL;
2304 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2305 return NULL;
2307 oprnd0 = gimple_assign_rhs1 (last_stmt);
2308 oprnd1 = gimple_assign_rhs2 (last_stmt);
2309 itype = TREE_TYPE (oprnd0);
2310 if (TREE_CODE (oprnd0) != SSA_NAME
2311 || TREE_CODE (oprnd1) != INTEGER_CST
2312 || TREE_CODE (itype) != INTEGER_TYPE
2313 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2314 return NULL;
2316 vectype = get_vectype_for_scalar_type (itype);
2317 if (vectype == NULL_TREE)
2318 return NULL;
2320 /* If the target can handle vectorized division or modulo natively,
2321 don't attempt to optimize this. */
2322 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2323 if (optab != unknown_optab)
2325 machine_mode vec_mode = TYPE_MODE (vectype);
2326 int icode = (int) optab_handler (optab, vec_mode);
2327 if (icode != CODE_FOR_nothing)
2328 return NULL;
2331 prec = TYPE_PRECISION (itype);
2332 if (integer_pow2p (oprnd1))
2334 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2335 return NULL;
2337 /* Pattern detected. */
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_NOTE, vect_location,
2340 "vect_recog_divmod_pattern: detected:\n");
2342 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2343 build_int_cst (itype, 0));
2344 if (rhs_code == TRUNC_DIV_EXPR)
2346 tree var = vect_recog_temp_ssa_var (itype, NULL);
2347 tree shift;
2348 def_stmt
2349 = gimple_build_assign (var, COND_EXPR, cond,
2350 fold_build2 (MINUS_EXPR, itype, oprnd1,
2351 build_int_cst (itype, 1)),
2352 build_int_cst (itype, 0));
2353 new_pattern_def_seq (stmt_vinfo, def_stmt);
2354 var = vect_recog_temp_ssa_var (itype, NULL);
2355 def_stmt
2356 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2357 gimple_assign_lhs (def_stmt));
2358 append_pattern_def_seq (stmt_vinfo, def_stmt);
2360 shift = build_int_cst (itype, tree_log2 (oprnd1));
2361 pattern_stmt
2362 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2363 RSHIFT_EXPR, var, shift);
2365 else
2367 tree signmask;
2368 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2369 if (compare_tree_int (oprnd1, 2) == 0)
2371 signmask = vect_recog_temp_ssa_var (itype, NULL);
2372 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2373 build_int_cst (itype, 1),
2374 build_int_cst (itype, 0));
2375 append_pattern_def_seq (stmt_vinfo, def_stmt);
2377 else
2379 tree utype
2380 = build_nonstandard_integer_type (prec, 1);
2381 tree vecutype = get_vectype_for_scalar_type (utype);
2382 tree shift
2383 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2384 - tree_log2 (oprnd1));
2385 tree var = vect_recog_temp_ssa_var (utype, NULL);
2387 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2388 build_int_cst (utype, -1),
2389 build_int_cst (utype, 0));
2390 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2391 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2392 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2393 append_pattern_def_seq (stmt_vinfo, def_stmt);
2394 var = vect_recog_temp_ssa_var (utype, NULL);
2395 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2396 gimple_assign_lhs (def_stmt),
2397 shift);
2398 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2399 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2400 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2401 append_pattern_def_seq (stmt_vinfo, def_stmt);
2402 signmask = vect_recog_temp_ssa_var (itype, NULL);
2403 def_stmt
2404 = gimple_build_assign (signmask, NOP_EXPR, var);
2405 append_pattern_def_seq (stmt_vinfo, def_stmt);
2407 def_stmt
2408 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2409 PLUS_EXPR, oprnd0, signmask);
2410 append_pattern_def_seq (stmt_vinfo, def_stmt);
2411 def_stmt
2412 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2413 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2414 fold_build2 (MINUS_EXPR, itype, oprnd1,
2415 build_int_cst (itype, 1)));
2416 append_pattern_def_seq (stmt_vinfo, def_stmt);
2418 pattern_stmt
2419 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2420 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2421 signmask);
2424 if (dump_enabled_p ())
2425 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2428 stmts->safe_push (last_stmt);
2430 *type_in = vectype;
2431 *type_out = vectype;
2432 return pattern_stmt;
2435 if (prec > HOST_BITS_PER_WIDE_INT
2436 || integer_zerop (oprnd1))
2437 return NULL;
2439 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2440 return NULL;
2442 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2444 if (TYPE_UNSIGNED (itype))
2446 unsigned HOST_WIDE_INT mh, ml;
2447 int pre_shift, post_shift;
2448 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2449 & GET_MODE_MASK (TYPE_MODE (itype)));
2450 tree t1, t2, t3, t4;
2452 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2453 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2454 return NULL;
2456 /* Find a suitable multiplier and right shift count
2457 instead of multiplying with D. */
2458 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2460 /* If the suggested multiplier is more than SIZE bits, we can do better
2461 for even divisors, using an initial right shift. */
2462 if (mh != 0 && (d & 1) == 0)
2464 pre_shift = floor_log2 (d & -d);
2465 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2466 &ml, &post_shift, &dummy_int);
2467 gcc_assert (!mh);
2469 else
2470 pre_shift = 0;
2472 if (mh != 0)
2474 if (post_shift - 1 >= prec)
2475 return NULL;
2477 /* t1 = oprnd0 h* ml;
2478 t2 = oprnd0 - t1;
2479 t3 = t2 >> 1;
2480 t4 = t1 + t3;
2481 q = t4 >> (post_shift - 1); */
2482 t1 = vect_recog_temp_ssa_var (itype, NULL);
2483 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2484 build_int_cst (itype, ml));
2485 append_pattern_def_seq (stmt_vinfo, def_stmt);
2487 t2 = vect_recog_temp_ssa_var (itype, NULL);
2488 def_stmt
2489 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2490 append_pattern_def_seq (stmt_vinfo, def_stmt);
2492 t3 = vect_recog_temp_ssa_var (itype, NULL);
2493 def_stmt
2494 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2495 append_pattern_def_seq (stmt_vinfo, def_stmt);
2497 t4 = vect_recog_temp_ssa_var (itype, NULL);
2498 def_stmt
2499 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2501 if (post_shift != 1)
2503 append_pattern_def_seq (stmt_vinfo, def_stmt);
2505 q = vect_recog_temp_ssa_var (itype, NULL);
2506 pattern_stmt
2507 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2508 build_int_cst (itype, post_shift - 1));
2510 else
2512 q = t4;
2513 pattern_stmt = def_stmt;
2516 else
2518 if (pre_shift >= prec || post_shift >= prec)
2519 return NULL;
2521 /* t1 = oprnd0 >> pre_shift;
2522 t2 = t1 h* ml;
2523 q = t2 >> post_shift; */
2524 if (pre_shift)
2526 t1 = vect_recog_temp_ssa_var (itype, NULL);
2527 def_stmt
2528 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2529 build_int_cst (NULL, pre_shift));
2530 append_pattern_def_seq (stmt_vinfo, def_stmt);
2532 else
2533 t1 = oprnd0;
2535 t2 = vect_recog_temp_ssa_var (itype, NULL);
2536 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2537 build_int_cst (itype, ml));
2539 if (post_shift)
2541 append_pattern_def_seq (stmt_vinfo, def_stmt);
2543 q = vect_recog_temp_ssa_var (itype, NULL);
2544 def_stmt
2545 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2546 build_int_cst (itype, post_shift));
2548 else
2549 q = t2;
2551 pattern_stmt = def_stmt;
2554 else
2556 unsigned HOST_WIDE_INT ml;
2557 int post_shift;
2558 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2559 unsigned HOST_WIDE_INT abs_d;
2560 bool add = false;
2561 tree t1, t2, t3, t4;
2563 /* Give up for -1. */
2564 if (d == -1)
2565 return NULL;
2567 /* Since d might be INT_MIN, we have to cast to
2568 unsigned HOST_WIDE_INT before negating to avoid
2569 undefined signed overflow. */
2570 abs_d = (d >= 0
2571 ? (unsigned HOST_WIDE_INT) d
2572 : - (unsigned HOST_WIDE_INT) d);
2574 /* n rem d = n rem -d */
2575 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2577 d = abs_d;
2578 oprnd1 = build_int_cst (itype, abs_d);
2580 else if (HOST_BITS_PER_WIDE_INT >= prec
2581 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2582 /* This case is not handled correctly below. */
2583 return NULL;
2585 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2586 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2588 add = true;
2589 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2591 if (post_shift >= prec)
2592 return NULL;
2594 /* t1 = oprnd0 h* ml; */
2595 t1 = vect_recog_temp_ssa_var (itype, NULL);
2596 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2597 build_int_cst (itype, ml));
2599 if (add)
2601 /* t2 = t1 + oprnd0; */
2602 append_pattern_def_seq (stmt_vinfo, def_stmt);
2603 t2 = vect_recog_temp_ssa_var (itype, NULL);
2604 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2606 else
2607 t2 = t1;
2609 if (post_shift)
2611 /* t3 = t2 >> post_shift; */
2612 append_pattern_def_seq (stmt_vinfo, def_stmt);
2613 t3 = vect_recog_temp_ssa_var (itype, NULL);
2614 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2615 build_int_cst (itype, post_shift));
2617 else
2618 t3 = t2;
2620 wide_int oprnd0_min, oprnd0_max;
2621 int msb = 1;
2622 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2624 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2625 msb = 0;
2626 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2627 msb = -1;
2630 if (msb == 0 && d >= 0)
2632 /* q = t3; */
2633 q = t3;
2634 pattern_stmt = def_stmt;
2636 else
2638 /* t4 = oprnd0 >> (prec - 1);
2639 or if we know from VRP that oprnd0 >= 0
2640 t4 = 0;
2641 or if we know from VRP that oprnd0 < 0
2642 t4 = -1; */
2643 append_pattern_def_seq (stmt_vinfo, def_stmt);
2644 t4 = vect_recog_temp_ssa_var (itype, NULL);
2645 if (msb != 1)
2646 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2647 build_int_cst (itype, msb));
2648 else
2649 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2650 build_int_cst (itype, prec - 1));
2651 append_pattern_def_seq (stmt_vinfo, def_stmt);
2653 /* q = t3 - t4; or q = t4 - t3; */
2654 q = vect_recog_temp_ssa_var (itype, NULL);
2655 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2656 d < 0 ? t3 : t4);
2660 if (rhs_code == TRUNC_MOD_EXPR)
2662 tree r, t1;
2664 /* We divided. Now finish by:
2665 t1 = q * oprnd1;
2666 r = oprnd0 - t1; */
2667 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2669 t1 = vect_recog_temp_ssa_var (itype, NULL);
2670 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2671 append_pattern_def_seq (stmt_vinfo, def_stmt);
2673 r = vect_recog_temp_ssa_var (itype, NULL);
2674 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2677 /* Pattern detected. */
2678 if (dump_enabled_p ())
2680 dump_printf_loc (MSG_NOTE, vect_location,
2681 "vect_recog_divmod_pattern: detected: ");
2682 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2683 dump_printf (MSG_NOTE, "\n");
2686 stmts->safe_push (last_stmt);
2688 *type_in = vectype;
2689 *type_out = vectype;
2690 return pattern_stmt;
2693 /* Function vect_recog_mixed_size_cond_pattern
2695 Try to find the following pattern:
2697 type x_t, y_t;
2698 TYPE a_T, b_T, c_T;
2699 loop:
2700 S1 a_T = x_t CMP y_t ? b_T : c_T;
2702 where type 'TYPE' is an integral type which has different size
2703 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2704 than 'type', the constants need to fit into an integer type
2705 with the same width as 'type') or results of conversion from 'type'.
2707 Input:
2709 * LAST_STMT: A stmt from which the pattern search begins.
2711 Output:
2713 * TYPE_IN: The type of the input arguments to the pattern.
2715 * TYPE_OUT: The type of the output of this pattern.
2717 * Return value: A new stmt that will be used to replace the pattern.
2718 Additionally a def_stmt is added.
2720 a_it = x_t CMP y_t ? b_it : c_it;
2721 a_T = (TYPE) a_it; */
2723 static gimple *
2724 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
2725 tree *type_out)
2727 gimple *last_stmt = (*stmts)[0];
2728 tree cond_expr, then_clause, else_clause;
2729 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2730 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2731 gimple *pattern_stmt, *def_stmt;
2732 vec_info *vinfo = stmt_vinfo->vinfo;
2733 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2734 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
2735 bool promotion;
2736 tree comp_scalar_type;
2738 if (!is_gimple_assign (last_stmt)
2739 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2740 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2741 return NULL;
2743 cond_expr = gimple_assign_rhs1 (last_stmt);
2744 then_clause = gimple_assign_rhs2 (last_stmt);
2745 else_clause = gimple_assign_rhs3 (last_stmt);
2747 if (!COMPARISON_CLASS_P (cond_expr))
2748 return NULL;
2750 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2751 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2752 if (comp_vectype == NULL_TREE)
2753 return NULL;
2755 type = gimple_expr_type (last_stmt);
2756 if (types_compatible_p (type, comp_scalar_type)
2757 || ((TREE_CODE (then_clause) != INTEGER_CST
2758 || TREE_CODE (else_clause) != INTEGER_CST)
2759 && !INTEGRAL_TYPE_P (comp_scalar_type))
2760 || !INTEGRAL_TYPE_P (type))
2761 return NULL;
2763 if ((TREE_CODE (then_clause) != INTEGER_CST
2764 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2765 &def_stmt0, &promotion))
2766 || (TREE_CODE (else_clause) != INTEGER_CST
2767 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2768 &def_stmt1, &promotion)))
2769 return NULL;
2771 if (orig_type0 && orig_type1
2772 && !types_compatible_p (orig_type0, orig_type1))
2773 return NULL;
2775 if (orig_type0)
2777 if (!types_compatible_p (orig_type0, comp_scalar_type))
2778 return NULL;
2779 then_clause = gimple_assign_rhs1 (def_stmt0);
2780 itype = orig_type0;
2783 if (orig_type1)
2785 if (!types_compatible_p (orig_type1, comp_scalar_type))
2786 return NULL;
2787 else_clause = gimple_assign_rhs1 (def_stmt1);
2788 itype = orig_type1;
2792 HOST_WIDE_INT cmp_mode_size
2793 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2795 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
2796 return NULL;
2798 vectype = get_vectype_for_scalar_type (type);
2799 if (vectype == NULL_TREE)
2800 return NULL;
2802 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2803 return NULL;
2805 if (itype == NULL_TREE)
2806 itype = build_nonstandard_integer_type (cmp_mode_size,
2807 TYPE_UNSIGNED (type));
2809 if (itype == NULL_TREE
2810 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
2811 return NULL;
2813 vecitype = get_vectype_for_scalar_type (itype);
2814 if (vecitype == NULL_TREE)
2815 return NULL;
2817 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2818 return NULL;
2820 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
2822 if ((TREE_CODE (then_clause) == INTEGER_CST
2823 && !int_fits_type_p (then_clause, itype))
2824 || (TREE_CODE (else_clause) == INTEGER_CST
2825 && !int_fits_type_p (else_clause, itype)))
2826 return NULL;
2829 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2830 COND_EXPR, unshare_expr (cond_expr),
2831 fold_convert (itype, then_clause),
2832 fold_convert (itype, else_clause));
2833 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2834 NOP_EXPR, gimple_assign_lhs (def_stmt));
2836 new_pattern_def_seq (stmt_vinfo, def_stmt);
2837 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
2838 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2839 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2840 *type_in = vecitype;
2841 *type_out = vectype;
2843 if (dump_enabled_p ())
2844 dump_printf_loc (MSG_NOTE, vect_location,
2845 "vect_recog_mixed_size_cond_pattern: detected:\n");
2847 return pattern_stmt;
2851 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2852 true if bool VAR can be optimized that way. */
2854 static bool
2855 check_bool_pattern (tree var, vec_info *vinfo)
2857 gimple *def_stmt;
2858 enum vect_def_type dt;
2859 tree rhs1;
2860 enum tree_code rhs_code;
2862 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
2863 return false;
2865 if (dt != vect_internal_def)
2866 return false;
2868 if (!is_gimple_assign (def_stmt))
2869 return false;
2871 if (!has_single_use (var))
2872 return false;
2874 rhs1 = gimple_assign_rhs1 (def_stmt);
2875 rhs_code = gimple_assign_rhs_code (def_stmt);
2876 switch (rhs_code)
2878 case SSA_NAME:
2879 return check_bool_pattern (rhs1, vinfo);
2881 CASE_CONVERT:
2882 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2883 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2884 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2885 return false;
2886 return check_bool_pattern (rhs1, vinfo);
2888 case BIT_NOT_EXPR:
2889 return check_bool_pattern (rhs1, vinfo);
2891 case BIT_AND_EXPR:
2892 case BIT_IOR_EXPR:
2893 case BIT_XOR_EXPR:
2894 if (!check_bool_pattern (rhs1, vinfo))
2895 return false;
2896 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
2898 default:
2899 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2901 tree vecitype, comp_vectype;
2903 /* If the comparison can throw, then is_gimple_condexpr will be
2904 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2905 if (stmt_could_throw_p (def_stmt))
2906 return false;
2908 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2909 if (comp_vectype == NULL_TREE)
2910 return false;
2912 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2914 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2915 tree itype
2916 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2917 vecitype = get_vectype_for_scalar_type (itype);
2918 if (vecitype == NULL_TREE)
2919 return false;
2921 else
2922 vecitype = comp_vectype;
2923 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2925 return false;
2930 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2931 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2932 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2934 static tree
2935 adjust_bool_pattern_cast (tree type, tree var)
2937 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2938 gimple *cast_stmt, *pattern_stmt;
2940 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2941 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2942 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2943 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2944 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2945 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2946 return gimple_assign_lhs (cast_stmt);
2950 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2951 recursively. VAR is an SSA_NAME that should be transformed from bool
2952 to a wider integer type, OUT_TYPE is the desired final integer type of
2953 the whole pattern, TRUEVAL should be NULL unless optimizing
2954 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2955 in the then_clause, STMTS is where statements with added pattern stmts
2956 should be pushed to. */
2958 static tree
2959 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2960 vec<gimple *> *stmts)
2962 gimple *stmt = SSA_NAME_DEF_STMT (var);
2963 enum tree_code rhs_code, def_rhs_code;
2964 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2965 location_t loc;
2966 gimple *pattern_stmt, *def_stmt;
2968 rhs1 = gimple_assign_rhs1 (stmt);
2969 rhs2 = gimple_assign_rhs2 (stmt);
2970 rhs_code = gimple_assign_rhs_code (stmt);
2971 loc = gimple_location (stmt);
2972 switch (rhs_code)
2974 case SSA_NAME:
2975 CASE_CONVERT:
2976 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2977 itype = TREE_TYPE (irhs1);
2978 pattern_stmt
2979 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2980 SSA_NAME, irhs1);
2981 break;
2983 case BIT_NOT_EXPR:
2984 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2985 itype = TREE_TYPE (irhs1);
2986 pattern_stmt
2987 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2988 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
2989 break;
2991 case BIT_AND_EXPR:
2992 /* Try to optimize x = y & (a < b ? 1 : 0); into
2993 x = (a < b ? y : 0);
2995 E.g. for:
2996 bool a_b, b_b, c_b;
2997 TYPE d_T;
2999 S1 a_b = x1 CMP1 y1;
3000 S2 b_b = x2 CMP2 y2;
3001 S3 c_b = a_b & b_b;
3002 S4 d_T = (TYPE) c_b;
3004 we would normally emit:
3006 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3007 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3008 S3' c_T = a_T & b_T;
3009 S4' d_T = c_T;
3011 but we can save one stmt by using the
3012 result of one of the COND_EXPRs in the other COND_EXPR and leave
3013 BIT_AND_EXPR stmt out:
3015 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3016 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3017 S4' f_T = c_T;
3019 At least when VEC_COND_EXPR is implemented using masks
3020 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3021 computes the comparison masks and ands it, in one case with
3022 all ones vector, in the other case with a vector register.
3023 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3024 often more expensive. */
3025 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3026 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3027 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3029 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3030 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3031 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3032 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3034 gimple *tstmt;
3035 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3036 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3037 tstmt = stmts->pop ();
3038 gcc_assert (tstmt == def_stmt);
3039 stmts->quick_push (stmt);
3040 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3041 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3042 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3043 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3044 return irhs2;
3046 else
3047 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3048 goto and_ior_xor;
3050 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3051 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3052 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3054 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3055 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3056 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3057 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3059 gimple *tstmt;
3060 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3061 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3062 tstmt = stmts->pop ();
3063 gcc_assert (tstmt == def_stmt);
3064 stmts->quick_push (stmt);
3065 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3066 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3067 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3068 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3069 return irhs1;
3071 else
3072 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3073 goto and_ior_xor;
3075 /* FALLTHRU */
3076 case BIT_IOR_EXPR:
3077 case BIT_XOR_EXPR:
3078 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3079 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3080 and_ior_xor:
3081 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3082 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3084 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3085 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3086 int out_prec = TYPE_PRECISION (out_type);
3087 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3088 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3089 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3090 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3091 else
3093 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3094 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3097 itype = TREE_TYPE (irhs1);
3098 pattern_stmt
3099 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3100 rhs_code, irhs1, irhs2);
3101 break;
3103 default:
3104 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3105 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3106 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3107 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3108 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3110 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3111 itype
3112 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3114 else
3115 itype = TREE_TYPE (rhs1);
3116 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3117 if (trueval == NULL_TREE)
3118 trueval = build_int_cst (itype, 1);
3119 else
3120 gcc_checking_assert (useless_type_conversion_p (itype,
3121 TREE_TYPE (trueval)));
3122 pattern_stmt
3123 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3124 COND_EXPR, cond_expr, trueval,
3125 build_int_cst (itype, 0));
3126 break;
3129 stmts->safe_push (stmt);
3130 gimple_set_location (pattern_stmt, loc);
3131 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3132 return gimple_assign_lhs (pattern_stmt);
3136 /* Function vect_recog_bool_pattern
3138 Try to find pattern like following:
3140 bool a_b, b_b, c_b, d_b, e_b;
3141 TYPE f_T;
3142 loop:
3143 S1 a_b = x1 CMP1 y1;
3144 S2 b_b = x2 CMP2 y2;
3145 S3 c_b = a_b & b_b;
3146 S4 d_b = x3 CMP3 y3;
3147 S5 e_b = c_b | d_b;
3148 S6 f_T = (TYPE) e_b;
3150 where type 'TYPE' is an integral type. Or a similar pattern
3151 ending in
3153 S6 f_Y = e_b ? r_Y : s_Y;
3155 as results from if-conversion of a complex condition.
3157 Input:
3159 * LAST_STMT: A stmt at the end from which the pattern
3160 search begins, i.e. cast of a bool to
3161 an integer type.
3163 Output:
3165 * TYPE_IN: The type of the input arguments to the pattern.
3167 * TYPE_OUT: The type of the output of this pattern.
3169 * Return value: A new stmt that will be used to replace the pattern.
3171 Assuming size of TYPE is the same as size of all comparisons
3172 (otherwise some casts would be added where needed), the above
3173 sequence we create related pattern stmts:
3174 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3175 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3176 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3177 S5' e_T = c_T | d_T;
3178 S6' f_T = e_T;
3180 Instead of the above S3' we could emit:
3181 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3182 S3' c_T = a_T | b_T;
3183 but the above is more efficient. */
3185 static gimple *
3186 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3187 tree *type_out)
3189 gimple *last_stmt = stmts->pop ();
3190 enum tree_code rhs_code;
3191 tree var, lhs, rhs, vectype;
3192 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3193 vec_info *vinfo = stmt_vinfo->vinfo;
3194 gimple *pattern_stmt;
3196 if (!is_gimple_assign (last_stmt))
3197 return NULL;
3199 var = gimple_assign_rhs1 (last_stmt);
3200 lhs = gimple_assign_lhs (last_stmt);
3202 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3203 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3204 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3205 return NULL;
3207 rhs_code = gimple_assign_rhs_code (last_stmt);
3208 if (CONVERT_EXPR_CODE_P (rhs_code))
3210 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3211 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3212 return NULL;
3213 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3214 if (vectype == NULL_TREE)
3215 return NULL;
3217 if (!check_bool_pattern (var, vinfo))
3218 return NULL;
3220 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3221 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3222 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3223 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3224 else
3225 pattern_stmt
3226 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3227 *type_out = vectype;
3228 *type_in = vectype;
3229 stmts->safe_push (last_stmt);
3230 if (dump_enabled_p ())
3231 dump_printf_loc (MSG_NOTE, vect_location,
3232 "vect_recog_bool_pattern: detected:\n");
3234 return pattern_stmt;
3236 else if (rhs_code == COND_EXPR
3237 && TREE_CODE (var) == SSA_NAME)
3239 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3240 if (vectype == NULL_TREE)
3241 return NULL;
3243 /* Build a scalar type for the boolean result that when
3244 vectorized matches the vector type of the result in
3245 size and number of elements. */
3246 unsigned prec
3247 = wi::udiv_trunc (TYPE_SIZE (vectype),
3248 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3249 tree type
3250 = build_nonstandard_integer_type (prec,
3251 TYPE_UNSIGNED (TREE_TYPE (var)));
3252 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3253 return NULL;
3255 if (!check_bool_pattern (var, vinfo))
3256 return NULL;
3258 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3259 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3260 pattern_stmt
3261 = gimple_build_assign (lhs, COND_EXPR,
3262 build2 (NE_EXPR, boolean_type_node,
3263 rhs, build_int_cst (type, 0)),
3264 gimple_assign_rhs2 (last_stmt),
3265 gimple_assign_rhs3 (last_stmt));
3266 *type_out = vectype;
3267 *type_in = vectype;
3268 stmts->safe_push (last_stmt);
3269 if (dump_enabled_p ())
3270 dump_printf_loc (MSG_NOTE, vect_location,
3271 "vect_recog_bool_pattern: detected:\n");
3273 return pattern_stmt;
3275 else if (rhs_code == SSA_NAME
3276 && STMT_VINFO_DATA_REF (stmt_vinfo))
3278 stmt_vec_info pattern_stmt_info;
3279 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3280 gcc_assert (vectype != NULL_TREE);
3281 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3282 return NULL;
3283 if (!check_bool_pattern (var, vinfo))
3284 return NULL;
3286 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3287 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3288 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3290 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3291 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3292 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3293 rhs = rhs2;
3295 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3296 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3297 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3298 STMT_VINFO_DATA_REF (pattern_stmt_info)
3299 = STMT_VINFO_DATA_REF (stmt_vinfo);
3300 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3301 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3302 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3303 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3304 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3305 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3306 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3307 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3308 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3309 *type_out = vectype;
3310 *type_in = vectype;
3311 stmts->safe_push (last_stmt);
3312 if (dump_enabled_p ())
3313 dump_printf_loc (MSG_NOTE, vect_location,
3314 "vect_recog_bool_pattern: detected:\n");
3315 return pattern_stmt;
3317 else
3318 return NULL;
3322 /* Mark statements that are involved in a pattern. */
3324 static inline void
3325 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
3326 tree pattern_vectype)
3328 stmt_vec_info pattern_stmt_info, def_stmt_info;
3329 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3330 vec_info *vinfo = orig_stmt_info->vinfo;
3331 gimple *def_stmt;
3333 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3334 if (pattern_stmt_info == NULL)
3336 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3337 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3339 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3341 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3342 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3343 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3344 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3345 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3346 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3347 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3348 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3349 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3351 gimple_stmt_iterator si;
3352 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3353 !gsi_end_p (si); gsi_next (&si))
3355 def_stmt = gsi_stmt (si);
3356 def_stmt_info = vinfo_for_stmt (def_stmt);
3357 if (def_stmt_info == NULL)
3359 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3360 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3362 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3363 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3364 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3365 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3366 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3371 /* Function vect_pattern_recog_1
3373 Input:
3374 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3375 computation pattern.
3376 STMT: A stmt from which the pattern search should start.
3378 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3379 expression that computes the same functionality and can be used to
3380 replace the sequence of stmts that are involved in the pattern.
3382 Output:
3383 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3384 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3385 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3386 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3387 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3388 to the available target pattern.
3390 This function also does some bookkeeping, as explained in the documentation
3391 for vect_recog_pattern. */
3393 static void
3394 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3395 gimple_stmt_iterator si,
3396 vec<gimple *> *stmts_to_replace)
3398 gimple *stmt = gsi_stmt (si), *pattern_stmt;
3399 stmt_vec_info stmt_info;
3400 loop_vec_info loop_vinfo;
3401 tree pattern_vectype;
3402 tree type_in, type_out;
3403 enum tree_code code;
3404 int i;
3405 gimple *next;
3407 stmts_to_replace->truncate (0);
3408 stmts_to_replace->quick_push (stmt);
3409 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3410 if (!pattern_stmt)
3411 return;
3413 stmt = stmts_to_replace->last ();
3414 stmt_info = vinfo_for_stmt (stmt);
3415 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3417 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3419 /* No need to check target support (already checked by the pattern
3420 recognition function). */
3421 pattern_vectype = type_out ? type_out : type_in;
3423 else
3425 machine_mode vec_mode;
3426 enum insn_code icode;
3427 optab optab;
3429 /* Check target support */
3430 type_in = get_vectype_for_scalar_type (type_in);
3431 if (!type_in)
3432 return;
3433 if (type_out)
3434 type_out = get_vectype_for_scalar_type (type_out);
3435 else
3436 type_out = type_in;
3437 if (!type_out)
3438 return;
3439 pattern_vectype = type_out;
3441 if (is_gimple_assign (pattern_stmt))
3442 code = gimple_assign_rhs_code (pattern_stmt);
3443 else
3445 gcc_assert (is_gimple_call (pattern_stmt));
3446 code = CALL_EXPR;
3449 optab = optab_for_tree_code (code, type_in, optab_default);
3450 vec_mode = TYPE_MODE (type_in);
3451 if (!optab
3452 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3453 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3454 return;
3457 /* Found a vectorizable pattern. */
3458 if (dump_enabled_p ())
3460 dump_printf_loc (MSG_NOTE, vect_location,
3461 "pattern recognized: ");
3462 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3465 /* Mark the stmts that are involved in the pattern. */
3466 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3468 /* Patterns cannot be vectorized using SLP, because they change the order of
3469 computation. */
3470 if (loop_vinfo)
3471 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3472 if (next == stmt)
3473 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3475 /* It is possible that additional pattern stmts are created and inserted in
3476 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3477 relevant statements. */
3478 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3479 && (unsigned) i < (stmts_to_replace->length () - 1);
3480 i++)
3482 stmt_info = vinfo_for_stmt (stmt);
3483 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3484 if (dump_enabled_p ())
3486 dump_printf_loc (MSG_NOTE, vect_location,
3487 "additional pattern stmt: ");
3488 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3491 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3496 /* Function vect_pattern_recog
3498 Input:
3499 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3500 computation idioms.
3502 Output - for each computation idiom that is detected we create a new stmt
3503 that provides the same functionality and that can be vectorized. We
3504 also record some information in the struct_stmt_info of the relevant
3505 stmts, as explained below:
3507 At the entry to this function we have the following stmts, with the
3508 following initial value in the STMT_VINFO fields:
3510 stmt in_pattern_p related_stmt vec_stmt
3511 S1: a_i = .... - - -
3512 S2: a_2 = ..use(a_i).. - - -
3513 S3: a_1 = ..use(a_2).. - - -
3514 S4: a_0 = ..use(a_1).. - - -
3515 S5: ... = ..use(a_0).. - - -
3517 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3518 represented by a single stmt. We then:
3519 - create a new stmt S6 equivalent to the pattern (the stmt is not
3520 inserted into the code)
3521 - fill in the STMT_VINFO fields as follows:
3523 in_pattern_p related_stmt vec_stmt
3524 S1: a_i = .... - - -
3525 S2: a_2 = ..use(a_i).. - - -
3526 S3: a_1 = ..use(a_2).. - - -
3527 S4: a_0 = ..use(a_1).. true S6 -
3528 '---> S6: a_new = .... - S4 -
3529 S5: ... = ..use(a_0).. - - -
3531 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3532 to each other through the RELATED_STMT field).
3534 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3535 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3536 remain irrelevant unless used by stmts other than S4.
3538 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3539 (because they are marked as irrelevant). It will vectorize S6, and record
3540 a pointer to the new vector stmt VS6 from S6 (as usual).
3541 S4 will be skipped, and S5 will be vectorized as usual:
3543 in_pattern_p related_stmt vec_stmt
3544 S1: a_i = .... - - -
3545 S2: a_2 = ..use(a_i).. - - -
3546 S3: a_1 = ..use(a_2).. - - -
3547 > VS6: va_new = .... - - -
3548 S4: a_0 = ..use(a_1).. true S6 VS6
3549 '---> S6: a_new = .... - S4 VS6
3550 > VS5: ... = ..vuse(va_new).. - - -
3551 S5: ... = ..use(a_0).. - - -
3553 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3554 elsewhere), and we'll end up with:
3556 VS6: va_new = ....
3557 VS5: ... = ..vuse(va_new)..
3559 In case of more than one pattern statements, e.g., widen-mult with
3560 intermediate type:
3562 S1 a_t = ;
3563 S2 a_T = (TYPE) a_t;
3564 '--> S3: a_it = (interm_type) a_t;
3565 S4 prod_T = a_T * CONST;
3566 '--> S5: prod_T' = a_it w* CONST;
3568 there may be other users of a_T outside the pattern. In that case S2 will
3569 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3570 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3571 be recorded in S3. */
3573 void
3574 vect_pattern_recog (vec_info *vinfo)
3576 struct loop *loop;
3577 basic_block *bbs;
3578 unsigned int nbbs;
3579 gimple_stmt_iterator si;
3580 unsigned int i, j;
3581 vect_recog_func_ptr vect_recog_func;
3582 auto_vec<gimple *, 1> stmts_to_replace;
3583 gimple *stmt;
3585 if (dump_enabled_p ())
3586 dump_printf_loc (MSG_NOTE, vect_location,
3587 "=== vect_pattern_recog ===\n");
3589 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
3591 loop = LOOP_VINFO_LOOP (loop_vinfo);
3592 bbs = LOOP_VINFO_BBS (loop_vinfo);
3593 nbbs = loop->num_nodes;
3595 /* Scan through the loop stmts, applying the pattern recognition
3596 functions starting at each stmt visited: */
3597 for (i = 0; i < nbbs; i++)
3599 basic_block bb = bbs[i];
3600 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3602 /* Scan over all generic vect_recog_xxx_pattern functions. */
3603 for (j = 0; j < NUM_PATTERNS; j++)
3605 vect_recog_func = vect_vect_recog_func_ptrs[j];
3606 vect_pattern_recog_1 (vect_recog_func, si,
3607 &stmts_to_replace);
3612 else
3614 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
3615 for (si = bb_vinfo->region_begin;
3616 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
3618 if ((stmt = gsi_stmt (si))
3619 && vinfo_for_stmt (stmt)
3620 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3621 continue;
3623 /* Scan over all generic vect_recog_xxx_pattern functions. */
3624 for (j = 0; j < NUM_PATTERNS; j++)
3626 vect_recog_func = vect_vect_recog_func_ptrs[j];
3627 vect_pattern_recog_1 (vect_recog_func, si,
3628 &stmts_to_replace);