* MAINTAINERS (nvptx): Add self.
[official-gcc.git] / gcc / tree-vect-patterns.c
blobf034635da8607dfa3fe863a1a243a2fc7883af62
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "rtl.h"
28 #include "ssa.h"
29 #include "alias.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "target.h"
33 #include "dominance.h"
34 #include "gimple-pretty-print.h"
35 #include "internal-fn.h"
36 #include "tree-eh.h"
37 #include "gimplify.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "flags.h"
41 #include "insn-config.h"
42 #include "expmed.h"
43 #include "dojump.h"
44 #include "explow.h"
45 #include "calls.h"
46 #include "emit-rtl.h"
47 #include "varasm.h"
48 #include "stmt.h"
49 #include "expr.h"
50 #include "insn-codes.h"
51 #include "optabs.h"
52 #include "params.h"
53 #include "tree-data-ref.h"
54 #include "tree-vectorizer.h"
55 #include "recog.h" /* FIXME: for insn_data */
56 #include "diagnostic-core.h"
57 #include "dumpfile.h"
58 #include "builtins.h"
60 /* Pattern recognition functions */
61 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
62 tree *);
63 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
64 tree *);
65 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
66 tree *);
67 static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
68 tree *);
69 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
70 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
71 tree *);
72 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
73 tree *, tree *);
74 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
75 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
76 tree *, tree *);
77 static gimple vect_recog_divmod_pattern (vec<gimple> *,
78 tree *, tree *);
79 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
80 tree *, tree *);
81 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
82 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
83 vect_recog_widen_mult_pattern,
84 vect_recog_widen_sum_pattern,
85 vect_recog_dot_prod_pattern,
86 vect_recog_sad_pattern,
87 vect_recog_pow_pattern,
88 vect_recog_widen_shift_pattern,
89 vect_recog_over_widening_pattern,
90 vect_recog_rotate_pattern,
91 vect_recog_vector_vector_shift_pattern,
92 vect_recog_divmod_pattern,
93 vect_recog_mixed_size_cond_pattern,
94 vect_recog_bool_pattern};
96 static inline void
97 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
99 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
100 stmt);
103 static inline void
104 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
106 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
107 append_pattern_def_seq (stmt_info, stmt);
110 /* Check whether STMT2 is in the same loop or basic block as STMT1.
111 Which of the two applies depends on whether we're currently doing
112 loop-based or basic-block-based vectorization, as determined by
113 the vinfo_for_stmt for STMT1 (which must be defined).
115 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
116 to be defined as well. */
118 static bool
119 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
121 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
122 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
123 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
125 if (!gimple_bb (stmt2))
126 return false;
128 if (loop_vinfo)
130 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
131 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
132 return false;
134 else
136 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
137 || gimple_code (stmt2) == GIMPLE_PHI)
138 return false;
141 gcc_assert (vinfo_for_stmt (stmt2));
142 return true;
145 /* If the LHS of DEF_STMT has a single use, and that statement is
146 in the same loop or basic block, return it. */
148 static gimple
149 vect_single_imm_use (gimple def_stmt)
151 tree lhs = gimple_assign_lhs (def_stmt);
152 use_operand_p use_p;
153 gimple use_stmt;
155 if (!single_imm_use (lhs, &use_p, &use_stmt))
156 return NULL;
158 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
159 return NULL;
161 return use_stmt;
164 /* Check whether NAME, an ssa-name used in USE_STMT,
165 is a result of a type promotion, such that:
166 DEF_STMT: NAME = NOP (name0)
167 If CHECK_SIGN is TRUE, check that either both types are signed or both are
168 unsigned. */
170 static bool
171 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
172 tree *orig_type, gimple *def_stmt, bool *promotion)
174 tree dummy;
175 gimple dummy_gimple;
176 loop_vec_info loop_vinfo;
177 stmt_vec_info stmt_vinfo;
178 tree type = TREE_TYPE (name);
179 tree oprnd0;
180 enum vect_def_type dt;
181 tree def;
182 bb_vec_info bb_vinfo;
184 stmt_vinfo = vinfo_for_stmt (use_stmt);
185 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
186 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
187 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
188 &def, &dt))
189 return false;
191 if (dt != vect_internal_def
192 && dt != vect_external_def && dt != vect_constant_def)
193 return false;
195 if (!*def_stmt)
196 return false;
198 if (!is_gimple_assign (*def_stmt))
199 return false;
201 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
202 return false;
204 oprnd0 = gimple_assign_rhs1 (*def_stmt);
206 *orig_type = TREE_TYPE (oprnd0);
207 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
208 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
209 return false;
211 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
212 *promotion = true;
213 else
214 *promotion = false;
216 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
217 bb_vinfo, &dummy_gimple, &dummy, &dt))
218 return false;
220 return true;
223 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
224 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
226 static tree
227 vect_recog_temp_ssa_var (tree type, gimple stmt)
229 return make_temp_ssa_name (type, stmt, "patt");
232 /* Function vect_recog_dot_prod_pattern
234 Try to find the following pattern:
236 type x_t, y_t;
237 TYPE1 prod;
238 TYPE2 sum = init;
239 loop:
240 sum_0 = phi <init, sum_1>
241 S1 x_t = ...
242 S2 y_t = ...
243 S3 x_T = (TYPE1) x_t;
244 S4 y_T = (TYPE1) y_t;
245 S5 prod = x_T * y_T;
246 [S6 prod = (TYPE2) prod; #optional]
247 S7 sum_1 = prod + sum_0;
249 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
250 same size of 'TYPE1' or bigger. This is a special case of a reduction
251 computation.
253 Input:
255 * STMTS: Contains a stmt from which the pattern search begins. In the
256 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
257 will be detected.
259 Output:
261 * TYPE_IN: The type of the input arguments to the pattern.
263 * TYPE_OUT: The type of the output of this pattern.
265 * Return value: A new stmt that will be used to replace the sequence of
266 stmts that constitute the pattern. In this case it will be:
267 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
269 Note: The dot-prod idiom is a widening reduction pattern that is
270 vectorized without preserving all the intermediate results. It
271 produces only N/2 (widened) results (by summing up pairs of
272 intermediate results) rather than all N results. Therefore, we
273 cannot allow this pattern when we want to get all the results and in
274 the correct order (as is the case when this computation is in an
275 inner-loop nested in an outer-loop that us being vectorized). */
277 static gimple
278 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
279 tree *type_out)
281 gimple stmt, last_stmt = (*stmts)[0];
282 tree oprnd0, oprnd1;
283 tree oprnd00, oprnd01;
284 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
285 tree type, half_type;
286 gimple pattern_stmt;
287 tree prod_type;
288 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
289 struct loop *loop;
290 tree var;
291 bool promotion;
293 if (!loop_info)
294 return NULL;
296 loop = LOOP_VINFO_LOOP (loop_info);
298 /* We don't allow changing the order of the computation in the inner-loop
299 when doing outer-loop vectorization. */
300 if (loop && nested_in_vect_loop_p (loop, last_stmt))
301 return NULL;
303 if (!is_gimple_assign (last_stmt))
304 return NULL;
306 type = gimple_expr_type (last_stmt);
308 /* Look for the following pattern
309 DX = (TYPE1) X;
310 DY = (TYPE1) Y;
311 DPROD = DX * DY;
312 DDPROD = (TYPE2) DPROD;
313 sum_1 = DDPROD + sum_0;
314 In which
315 - DX is double the size of X
316 - DY is double the size of Y
317 - DX, DY, DPROD all have the same type
318 - sum is the same size of DPROD or bigger
319 - sum has been recognized as a reduction variable.
321 This is equivalent to:
322 DPROD = X w* Y; #widen mult
323 sum_1 = DPROD w+ sum_0; #widen summation
325 DPROD = X w* Y; #widen mult
326 sum_1 = DPROD + sum_0; #summation
329 /* Starting from LAST_STMT, follow the defs of its uses in search
330 of the above pattern. */
332 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
333 return NULL;
335 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
337 /* Has been detected as widening-summation? */
339 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
340 type = gimple_expr_type (stmt);
341 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
342 return NULL;
343 oprnd0 = gimple_assign_rhs1 (stmt);
344 oprnd1 = gimple_assign_rhs2 (stmt);
345 half_type = TREE_TYPE (oprnd0);
347 else
349 gimple def_stmt;
351 oprnd0 = gimple_assign_rhs1 (last_stmt);
352 oprnd1 = gimple_assign_rhs2 (last_stmt);
353 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
354 || !types_compatible_p (TREE_TYPE (oprnd1), type))
355 return NULL;
356 stmt = last_stmt;
358 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
359 &promotion)
360 && promotion)
362 stmt = def_stmt;
363 oprnd0 = gimple_assign_rhs1 (stmt);
365 else
366 half_type = type;
369 /* So far so good. Since last_stmt was detected as a (summation) reduction,
370 we know that oprnd1 is the reduction variable (defined by a loop-header
371 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
372 Left to check that oprnd0 is defined by a (widen_)mult_expr */
373 if (TREE_CODE (oprnd0) != SSA_NAME)
374 return NULL;
376 prod_type = half_type;
377 stmt = SSA_NAME_DEF_STMT (oprnd0);
379 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
380 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
381 return NULL;
383 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
384 inside the loop (in case we are analyzing an outer-loop). */
385 if (!is_gimple_assign (stmt))
386 return NULL;
387 stmt_vinfo = vinfo_for_stmt (stmt);
388 gcc_assert (stmt_vinfo);
389 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
390 return NULL;
391 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
392 return NULL;
393 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
395 /* Has been detected as a widening multiplication? */
397 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
398 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
399 return NULL;
400 stmt_vinfo = vinfo_for_stmt (stmt);
401 gcc_assert (stmt_vinfo);
402 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
403 oprnd00 = gimple_assign_rhs1 (stmt);
404 oprnd01 = gimple_assign_rhs2 (stmt);
405 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
406 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
408 else
410 tree half_type0, half_type1;
411 gimple def_stmt;
412 tree oprnd0, oprnd1;
414 oprnd0 = gimple_assign_rhs1 (stmt);
415 oprnd1 = gimple_assign_rhs2 (stmt);
416 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
417 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
418 return NULL;
419 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
420 &promotion)
421 || !promotion)
422 return NULL;
423 oprnd00 = gimple_assign_rhs1 (def_stmt);
424 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
425 &promotion)
426 || !promotion)
427 return NULL;
428 oprnd01 = gimple_assign_rhs1 (def_stmt);
429 if (!types_compatible_p (half_type0, half_type1))
430 return NULL;
431 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
432 return NULL;
435 half_type = TREE_TYPE (oprnd00);
436 *type_in = half_type;
437 *type_out = type;
439 /* Pattern detected. Create a stmt to be used to replace the pattern: */
440 var = vect_recog_temp_ssa_var (type, NULL);
441 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
442 oprnd00, oprnd01, oprnd1);
444 if (dump_enabled_p ())
446 dump_printf_loc (MSG_NOTE, vect_location,
447 "vect_recog_dot_prod_pattern: detected: ");
448 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
449 dump_printf (MSG_NOTE, "\n");
452 return pattern_stmt;
456 /* Function vect_recog_sad_pattern
458 Try to find the following Sum of Absolute Difference (SAD) pattern:
460 type x_t, y_t;
461 signed TYPE1 diff, abs_diff;
462 TYPE2 sum = init;
463 loop:
464 sum_0 = phi <init, sum_1>
465 S1 x_t = ...
466 S2 y_t = ...
467 S3 x_T = (TYPE1) x_t;
468 S4 y_T = (TYPE1) y_t;
469 S5 diff = x_T - y_T;
470 S6 abs_diff = ABS_EXPR <diff>;
471 [S7 abs_diff = (TYPE2) abs_diff; #optional]
472 S8 sum_1 = abs_diff + sum_0;
474 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
475 same size of 'TYPE1' or bigger. This is a special case of a reduction
476 computation.
478 Input:
480 * STMTS: Contains a stmt from which the pattern search begins. In the
481 example, when this function is called with S8, the pattern
482 {S3,S4,S5,S6,S7,S8} will be detected.
484 Output:
486 * TYPE_IN: The type of the input arguments to the pattern.
488 * TYPE_OUT: The type of the output of this pattern.
490 * Return value: A new stmt that will be used to replace the sequence of
491 stmts that constitute the pattern. In this case it will be:
492 SAD_EXPR <x_t, y_t, sum_0>
495 static gimple
496 vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
497 tree *type_out)
499 gimple last_stmt = (*stmts)[0];
500 tree sad_oprnd0, sad_oprnd1;
501 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
502 tree half_type;
503 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
504 struct loop *loop;
505 bool promotion;
507 if (!loop_info)
508 return NULL;
510 loop = LOOP_VINFO_LOOP (loop_info);
512 /* We don't allow changing the order of the computation in the inner-loop
513 when doing outer-loop vectorization. */
514 if (loop && nested_in_vect_loop_p (loop, last_stmt))
515 return NULL;
517 if (!is_gimple_assign (last_stmt))
518 return NULL;
520 tree sum_type = gimple_expr_type (last_stmt);
522 /* Look for the following pattern
523 DX = (TYPE1) X;
524 DY = (TYPE1) Y;
525 DDIFF = DX - DY;
526 DAD = ABS_EXPR <DDIFF>;
527 DDPROD = (TYPE2) DPROD;
528 sum_1 = DAD + sum_0;
529 In which
530 - DX is at least double the size of X
531 - DY is at least double the size of Y
532 - DX, DY, DDIFF, DAD all have the same type
533 - sum is the same size of DAD or bigger
534 - sum has been recognized as a reduction variable.
536 This is equivalent to:
537 DDIFF = X w- Y; #widen sub
538 DAD = ABS_EXPR <DDIFF>;
539 sum_1 = DAD w+ sum_0; #widen summation
541 DDIFF = X w- Y; #widen sub
542 DAD = ABS_EXPR <DDIFF>;
543 sum_1 = DAD + sum_0; #summation
546 /* Starting from LAST_STMT, follow the defs of its uses in search
547 of the above pattern. */
549 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
550 return NULL;
552 tree plus_oprnd0, plus_oprnd1;
554 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
556 /* Has been detected as widening-summation? */
558 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
559 sum_type = gimple_expr_type (stmt);
560 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
561 return NULL;
562 plus_oprnd0 = gimple_assign_rhs1 (stmt);
563 plus_oprnd1 = gimple_assign_rhs2 (stmt);
564 half_type = TREE_TYPE (plus_oprnd0);
566 else
568 gimple def_stmt;
570 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
571 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
572 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
573 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
574 return NULL;
576 /* The type conversion could be promotion, demotion,
577 or just signed -> unsigned. */
578 if (type_conversion_p (plus_oprnd0, last_stmt, false,
579 &half_type, &def_stmt, &promotion))
580 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
581 else
582 half_type = sum_type;
585 /* So far so good. Since last_stmt was detected as a (summation) reduction,
586 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
587 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
588 Then check that plus_oprnd0 is defined by an abs_expr. */
590 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
591 return NULL;
593 tree abs_type = half_type;
594 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
596 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
597 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
598 return NULL;
600 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
601 inside the loop (in case we are analyzing an outer-loop). */
602 if (!is_gimple_assign (abs_stmt))
603 return NULL;
605 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
606 gcc_assert (abs_stmt_vinfo);
607 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
608 return NULL;
609 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
610 return NULL;
612 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
613 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
614 return NULL;
615 if (TYPE_UNSIGNED (abs_type))
616 return NULL;
618 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
620 if (TREE_CODE (abs_oprnd) != SSA_NAME)
621 return NULL;
623 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
625 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
626 if (!gimple_bb (diff_stmt)
627 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
628 return NULL;
630 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
631 inside the loop (in case we are analyzing an outer-loop). */
632 if (!is_gimple_assign (diff_stmt))
633 return NULL;
635 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
636 gcc_assert (diff_stmt_vinfo);
637 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
638 return NULL;
639 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
640 return NULL;
642 tree half_type0, half_type1;
643 gimple def_stmt;
645 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
646 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
648 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
649 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
650 return NULL;
651 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
652 &half_type0, &def_stmt, &promotion)
653 || !promotion)
654 return NULL;
655 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
657 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
658 &half_type1, &def_stmt, &promotion)
659 || !promotion)
660 return NULL;
661 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
663 if (!types_compatible_p (half_type0, half_type1))
664 return NULL;
665 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
666 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
667 return NULL;
669 *type_in = TREE_TYPE (sad_oprnd0);
670 *type_out = sum_type;
672 /* Pattern detected. Create a stmt to be used to replace the pattern: */
673 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
674 gimple pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
675 sad_oprnd1, plus_oprnd1);
677 if (dump_enabled_p ())
679 dump_printf_loc (MSG_NOTE, vect_location,
680 "vect_recog_sad_pattern: detected: ");
681 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
682 dump_printf (MSG_NOTE, "\n");
685 return pattern_stmt;
689 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
690 and LSHIFT_EXPR.
692 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
693 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
695 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
696 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
697 that satisfies the above restrictions, we can perform a widening opeartion
698 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
699 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
701 static bool
702 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
703 tree const_oprnd, tree *oprnd,
704 gimple *wstmt, tree type,
705 tree *half_type, gimple def_stmt)
707 tree new_type, new_oprnd;
709 if (code != MULT_EXPR && code != LSHIFT_EXPR)
710 return false;
712 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
713 || (code == LSHIFT_EXPR
714 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
715 != 1))
716 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
718 /* CONST_OPRND is a constant of HALF_TYPE. */
719 *oprnd = gimple_assign_rhs1 (def_stmt);
720 return true;
723 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
724 return false;
726 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
727 return false;
729 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
730 a type 2 times bigger than HALF_TYPE. */
731 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
732 TYPE_UNSIGNED (type));
733 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
734 || (code == LSHIFT_EXPR
735 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
736 return false;
738 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
739 *oprnd = gimple_assign_rhs1 (def_stmt);
740 new_oprnd = make_ssa_name (new_type);
741 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
742 *oprnd = new_oprnd;
744 *half_type = new_type;
745 return true;
749 /* Function vect_recog_widen_mult_pattern
751 Try to find the following pattern:
753 type1 a_t;
754 type2 b_t;
755 TYPE a_T, b_T, prod_T;
757 S1 a_t = ;
758 S2 b_t = ;
759 S3 a_T = (TYPE) a_t;
760 S4 b_T = (TYPE) b_t;
761 S5 prod_T = a_T * b_T;
763 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
765 Also detect unsigned cases:
767 unsigned type1 a_t;
768 unsigned type2 b_t;
769 unsigned TYPE u_prod_T;
770 TYPE a_T, b_T, prod_T;
772 S1 a_t = ;
773 S2 b_t = ;
774 S3 a_T = (TYPE) a_t;
775 S4 b_T = (TYPE) b_t;
776 S5 prod_T = a_T * b_T;
777 S6 u_prod_T = (unsigned TYPE) prod_T;
779 and multiplication by constants:
781 type a_t;
782 TYPE a_T, prod_T;
784 S1 a_t = ;
785 S3 a_T = (TYPE) a_t;
786 S5 prod_T = a_T * CONST;
788 A special case of multiplication by constants is when 'TYPE' is 4 times
789 bigger than 'type', but CONST fits an intermediate type 2 times smaller
790 than 'TYPE'. In that case we create an additional pattern stmt for S3
791 to create a variable of the intermediate type, and perform widen-mult
792 on the intermediate type as well:
794 type a_t;
795 interm_type a_it;
796 TYPE a_T, prod_T, prod_T';
798 S1 a_t = ;
799 S3 a_T = (TYPE) a_t;
800 '--> a_it = (interm_type) a_t;
801 S5 prod_T = a_T * CONST;
802 '--> prod_T' = a_it w* CONST;
804 Input/Output:
806 * STMTS: Contains a stmt from which the pattern search begins. In the
807 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
808 is detected. In case of unsigned widen-mult, the original stmt (S5) is
809 replaced with S6 in STMTS. In case of multiplication by a constant
810 of an intermediate type (the last case above), STMTS also contains S3
811 (inserted before S5).
813 Output:
815 * TYPE_IN: The type of the input arguments to the pattern.
817 * TYPE_OUT: The type of the output of this pattern.
819 * Return value: A new stmt that will be used to replace the sequence of
820 stmts that constitute the pattern. In this case it will be:
821 WIDEN_MULT <a_t, b_t>
822 If the result of WIDEN_MULT needs to be converted to a larger type, the
823 returned stmt will be this type conversion stmt.
826 static gimple
827 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
828 tree *type_in, tree *type_out)
830 gimple last_stmt = stmts->pop ();
831 gimple def_stmt0, def_stmt1;
832 tree oprnd0, oprnd1;
833 tree type, half_type0, half_type1;
834 gimple new_stmt = NULL, pattern_stmt = NULL;
835 tree vectype, vecitype;
836 tree var;
837 enum tree_code dummy_code;
838 int dummy_int;
839 vec<tree> dummy_vec;
840 bool op1_ok;
841 bool promotion;
843 if (!is_gimple_assign (last_stmt))
844 return NULL;
846 type = gimple_expr_type (last_stmt);
848 /* Starting from LAST_STMT, follow the defs of its uses in search
849 of the above pattern. */
851 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
852 return NULL;
854 oprnd0 = gimple_assign_rhs1 (last_stmt);
855 oprnd1 = gimple_assign_rhs2 (last_stmt);
856 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
857 || !types_compatible_p (TREE_TYPE (oprnd1), type))
858 return NULL;
860 /* Check argument 0. */
861 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
862 &promotion)
863 || !promotion)
864 return NULL;
865 /* Check argument 1. */
866 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
867 &def_stmt1, &promotion);
869 if (op1_ok && promotion)
871 oprnd0 = gimple_assign_rhs1 (def_stmt0);
872 oprnd1 = gimple_assign_rhs1 (def_stmt1);
874 else
876 if (TREE_CODE (oprnd1) == INTEGER_CST
877 && TREE_CODE (half_type0) == INTEGER_TYPE
878 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
879 &oprnd0, &new_stmt, type,
880 &half_type0, def_stmt0))
882 half_type1 = half_type0;
883 oprnd1 = fold_convert (half_type1, oprnd1);
885 else
886 return NULL;
889 /* If the two arguments have different sizes, convert the one with
890 the smaller type into the larger type. */
891 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
893 /* If we already used up the single-stmt slot give up. */
894 if (new_stmt)
895 return NULL;
897 tree* oprnd = NULL;
898 gimple def_stmt = NULL;
900 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
902 def_stmt = def_stmt0;
903 half_type0 = half_type1;
904 oprnd = &oprnd0;
906 else
908 def_stmt = def_stmt1;
909 half_type1 = half_type0;
910 oprnd = &oprnd1;
913 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
914 tree new_oprnd = make_ssa_name (half_type0);
915 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
916 *oprnd = new_oprnd;
919 /* Handle unsigned case. Look for
920 S6 u_prod_T = (unsigned TYPE) prod_T;
921 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
922 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
924 gimple use_stmt;
925 tree use_lhs;
926 tree use_type;
928 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
929 return NULL;
931 use_stmt = vect_single_imm_use (last_stmt);
932 if (!use_stmt || !is_gimple_assign (use_stmt)
933 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
934 return NULL;
936 use_lhs = gimple_assign_lhs (use_stmt);
937 use_type = TREE_TYPE (use_lhs);
938 if (!INTEGRAL_TYPE_P (use_type)
939 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
940 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
941 return NULL;
943 type = use_type;
944 last_stmt = use_stmt;
947 if (!types_compatible_p (half_type0, half_type1))
948 return NULL;
950 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
951 to get an intermediate result of type ITYPE. In this case we need
952 to build a statement to convert this intermediate result to type TYPE. */
953 tree itype = type;
954 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
955 itype = build_nonstandard_integer_type
956 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
957 TYPE_UNSIGNED (type));
959 /* Pattern detected. */
960 if (dump_enabled_p ())
961 dump_printf_loc (MSG_NOTE, vect_location,
962 "vect_recog_widen_mult_pattern: detected:\n");
964 /* Check target support */
965 vectype = get_vectype_for_scalar_type (half_type0);
966 vecitype = get_vectype_for_scalar_type (itype);
967 if (!vectype
968 || !vecitype
969 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
970 vecitype, vectype,
971 &dummy_code, &dummy_code,
972 &dummy_int, &dummy_vec))
973 return NULL;
975 *type_in = vectype;
976 *type_out = get_vectype_for_scalar_type (type);
978 /* Pattern supported. Create a stmt to be used to replace the pattern: */
979 var = vect_recog_temp_ssa_var (itype, NULL);
980 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
982 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
983 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
984 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
985 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
987 /* If the original two operands have different sizes, we may need to convert
988 the smaller one into the larget type. If this is the case, at this point
989 the new stmt is already built. */
990 if (new_stmt)
992 append_pattern_def_seq (stmt_vinfo, new_stmt);
993 stmt_vec_info new_stmt_info
994 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
995 set_vinfo_for_stmt (new_stmt, new_stmt_info);
996 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
999 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1000 the result of the widen-mult operation into type TYPE. */
1001 if (itype != type)
1003 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1004 stmt_vec_info pattern_stmt_info
1005 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1006 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1007 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1008 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1009 NOP_EXPR,
1010 gimple_assign_lhs (pattern_stmt));
1013 if (dump_enabled_p ())
1014 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1016 stmts->safe_push (last_stmt);
1017 return pattern_stmt;
1021 /* Function vect_recog_pow_pattern
1023 Try to find the following pattern:
1025 x = POW (y, N);
1027 with POW being one of pow, powf, powi, powif and N being
1028 either 2 or 0.5.
1030 Input:
1032 * LAST_STMT: A stmt from which the pattern search begins.
1034 Output:
1036 * TYPE_IN: The type of the input arguments to the pattern.
1038 * TYPE_OUT: The type of the output of this pattern.
1040 * Return value: A new stmt that will be used to replace the sequence of
1041 stmts that constitute the pattern. In this case it will be:
1042 x = x * x
1044 x = sqrt (x)
1047 static gimple
1048 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
1049 tree *type_out)
1051 gimple last_stmt = (*stmts)[0];
1052 tree fn, base, exp = NULL;
1053 gimple stmt;
1054 tree var;
1056 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1057 return NULL;
1059 fn = gimple_call_fndecl (last_stmt);
1060 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1061 return NULL;
1063 switch (DECL_FUNCTION_CODE (fn))
1065 case BUILT_IN_POWIF:
1066 case BUILT_IN_POWI:
1067 case BUILT_IN_POWF:
1068 case BUILT_IN_POW:
1069 base = gimple_call_arg (last_stmt, 0);
1070 exp = gimple_call_arg (last_stmt, 1);
1071 if (TREE_CODE (exp) != REAL_CST
1072 && TREE_CODE (exp) != INTEGER_CST)
1073 return NULL;
1074 break;
1076 default:
1077 return NULL;
1080 /* We now have a pow or powi builtin function call with a constant
1081 exponent. */
1083 *type_out = NULL_TREE;
1085 /* Catch squaring. */
1086 if ((tree_fits_shwi_p (exp)
1087 && tree_to_shwi (exp) == 2)
1088 || (TREE_CODE (exp) == REAL_CST
1089 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
1091 *type_in = TREE_TYPE (base);
1093 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1094 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1095 return stmt;
1098 /* Catch square root. */
1099 if (TREE_CODE (exp) == REAL_CST
1100 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1102 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1103 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1104 if (*type_in)
1106 gcall *stmt = gimple_build_call (newfn, 1, base);
1107 if (vectorizable_function (stmt, *type_in, *type_in)
1108 != NULL_TREE)
1110 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1111 gimple_call_set_lhs (stmt, var);
1112 return stmt;
1117 return NULL;
1121 /* Function vect_recog_widen_sum_pattern
1123 Try to find the following pattern:
1125 type x_t;
1126 TYPE x_T, sum = init;
1127 loop:
1128 sum_0 = phi <init, sum_1>
1129 S1 x_t = *p;
1130 S2 x_T = (TYPE) x_t;
1131 S3 sum_1 = x_T + sum_0;
1133 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1134 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1135 a special case of a reduction computation.
1137 Input:
1139 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1140 when this function is called with S3, the pattern {S2,S3} will be detected.
1142 Output:
1144 * TYPE_IN: The type of the input arguments to the pattern.
1146 * TYPE_OUT: The type of the output of this pattern.
1148 * Return value: A new stmt that will be used to replace the sequence of
1149 stmts that constitute the pattern. In this case it will be:
1150 WIDEN_SUM <x_t, sum_0>
1152 Note: The widening-sum idiom is a widening reduction pattern that is
1153 vectorized without preserving all the intermediate results. It
1154 produces only N/2 (widened) results (by summing up pairs of
1155 intermediate results) rather than all N results. Therefore, we
1156 cannot allow this pattern when we want to get all the results and in
1157 the correct order (as is the case when this computation is in an
1158 inner-loop nested in an outer-loop that us being vectorized). */
1160 static gimple
1161 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
1162 tree *type_out)
1164 gimple stmt, last_stmt = (*stmts)[0];
1165 tree oprnd0, oprnd1;
1166 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1167 tree type, half_type;
1168 gimple pattern_stmt;
1169 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1170 struct loop *loop;
1171 tree var;
1172 bool promotion;
1174 if (!loop_info)
1175 return NULL;
1177 loop = LOOP_VINFO_LOOP (loop_info);
1179 /* We don't allow changing the order of the computation in the inner-loop
1180 when doing outer-loop vectorization. */
1181 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1182 return NULL;
1184 if (!is_gimple_assign (last_stmt))
1185 return NULL;
1187 type = gimple_expr_type (last_stmt);
1189 /* Look for the following pattern
1190 DX = (TYPE) X;
1191 sum_1 = DX + sum_0;
1192 In which DX is at least double the size of X, and sum_1 has been
1193 recognized as a reduction variable.
1196 /* Starting from LAST_STMT, follow the defs of its uses in search
1197 of the above pattern. */
1199 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1200 return NULL;
1202 oprnd0 = gimple_assign_rhs1 (last_stmt);
1203 oprnd1 = gimple_assign_rhs2 (last_stmt);
1204 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1205 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1206 return NULL;
1208 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1209 we know that oprnd1 is the reduction variable (defined by a loop-header
1210 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1211 Left to check that oprnd0 is defined by a cast from type 'type' to type
1212 'TYPE'. */
1214 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1215 &promotion)
1216 || !promotion)
1217 return NULL;
1219 oprnd0 = gimple_assign_rhs1 (stmt);
1220 *type_in = half_type;
1221 *type_out = type;
1223 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1224 var = vect_recog_temp_ssa_var (type, NULL);
1225 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1227 if (dump_enabled_p ())
1229 dump_printf_loc (MSG_NOTE, vect_location,
1230 "vect_recog_widen_sum_pattern: detected: ");
1231 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1232 dump_printf (MSG_NOTE, "\n");
1235 return pattern_stmt;
1239 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1241 Input:
1242 STMT - a statement to check.
1243 DEF - we support operations with two operands, one of which is constant.
1244 The other operand can be defined by a demotion operation, or by a
1245 previous statement in a sequence of over-promoted operations. In the
1246 later case DEF is used to replace that operand. (It is defined by a
1247 pattern statement we created for the previous statement in the
1248 sequence).
1250 Input/output:
1251 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1252 NULL, it's the type of DEF.
1253 STMTS - additional pattern statements. If a pattern statement (type
1254 conversion) is created in this function, its original statement is
1255 added to STMTS.
1257 Output:
1258 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1259 operands to use in the new pattern statement for STMT (will be created
1260 in vect_recog_over_widening_pattern ()).
1261 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1262 statements for STMT: the first one is a type promotion and the second
1263 one is the operation itself. We return the type promotion statement
1264 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1265 the second pattern statement. */
1267 static bool
1268 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1269 tree *op0, tree *op1, gimple *new_def_stmt,
1270 vec<gimple> *stmts)
1272 enum tree_code code;
1273 tree const_oprnd, oprnd;
1274 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1275 gimple def_stmt, new_stmt;
1276 bool first = false;
1277 bool promotion;
1279 *op0 = NULL_TREE;
1280 *op1 = NULL_TREE;
1281 *new_def_stmt = NULL;
1283 if (!is_gimple_assign (stmt))
1284 return false;
1286 code = gimple_assign_rhs_code (stmt);
1287 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1288 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1289 return false;
1291 oprnd = gimple_assign_rhs1 (stmt);
1292 const_oprnd = gimple_assign_rhs2 (stmt);
1293 type = gimple_expr_type (stmt);
1295 if (TREE_CODE (oprnd) != SSA_NAME
1296 || TREE_CODE (const_oprnd) != INTEGER_CST)
1297 return false;
1299 /* If oprnd has other uses besides that in stmt we cannot mark it
1300 as being part of a pattern only. */
1301 if (!has_single_use (oprnd))
1302 return false;
1304 /* If we are in the middle of a sequence, we use DEF from a previous
1305 statement. Otherwise, OPRND has to be a result of type promotion. */
1306 if (*new_type)
1308 half_type = *new_type;
1309 oprnd = def;
1311 else
1313 first = true;
1314 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1315 &promotion)
1316 || !promotion
1317 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1318 return false;
1321 /* Can we perform the operation on a smaller type? */
1322 switch (code)
1324 case BIT_IOR_EXPR:
1325 case BIT_XOR_EXPR:
1326 case BIT_AND_EXPR:
1327 if (!int_fits_type_p (const_oprnd, half_type))
1329 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1330 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1331 return false;
1333 interm_type = build_nonstandard_integer_type (
1334 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1335 if (!int_fits_type_p (const_oprnd, interm_type))
1336 return false;
1339 break;
1341 case LSHIFT_EXPR:
1342 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1343 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1344 return false;
1346 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1347 (e.g., if the original value was char, the shift amount is at most 8
1348 if we want to use short). */
1349 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1350 return false;
1352 interm_type = build_nonstandard_integer_type (
1353 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1355 if (!vect_supportable_shift (code, interm_type))
1356 return false;
1358 break;
1360 case RSHIFT_EXPR:
1361 if (vect_supportable_shift (code, half_type))
1362 break;
1364 /* Try intermediate type - HALF_TYPE is not supported. */
1365 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1366 return false;
1368 interm_type = build_nonstandard_integer_type (
1369 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1371 if (!vect_supportable_shift (code, interm_type))
1372 return false;
1374 break;
1376 default:
1377 gcc_unreachable ();
1380 /* There are four possible cases:
1381 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1382 the first statement in the sequence)
1383 a. The original, HALF_TYPE, is not enough - we replace the promotion
1384 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1385 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1386 promotion.
1387 2. OPRND is defined by a pattern statement we created.
1388 a. Its type is not sufficient for the operation, we create a new stmt:
1389 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1390 this statement in NEW_DEF_STMT, and it is later put in
1391 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1392 b. OPRND is good to use in the new statement. */
1393 if (first)
1395 if (interm_type)
1397 /* Replace the original type conversion HALF_TYPE->TYPE with
1398 HALF_TYPE->INTERM_TYPE. */
1399 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1401 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1402 /* Check if the already created pattern stmt is what we need. */
1403 if (!is_gimple_assign (new_stmt)
1404 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1405 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1406 return false;
1408 stmts->safe_push (def_stmt);
1409 oprnd = gimple_assign_lhs (new_stmt);
1411 else
1413 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1414 oprnd = gimple_assign_rhs1 (def_stmt);
1415 new_oprnd = make_ssa_name (interm_type);
1416 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1417 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1418 stmts->safe_push (def_stmt);
1419 oprnd = new_oprnd;
1422 else
1424 /* Retrieve the operand before the type promotion. */
1425 oprnd = gimple_assign_rhs1 (def_stmt);
1428 else
1430 if (interm_type)
1432 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1433 new_oprnd = make_ssa_name (interm_type);
1434 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1435 oprnd = new_oprnd;
1436 *new_def_stmt = new_stmt;
1439 /* Otherwise, OPRND is already set. */
1442 if (interm_type)
1443 *new_type = interm_type;
1444 else
1445 *new_type = half_type;
1447 *op0 = oprnd;
1448 *op1 = fold_convert (*new_type, const_oprnd);
1450 return true;
1454 /* Try to find a statement or a sequence of statements that can be performed
1455 on a smaller type:
1457 type x_t;
1458 TYPE x_T, res0_T, res1_T;
1459 loop:
1460 S1 x_t = *p;
1461 S2 x_T = (TYPE) x_t;
1462 S3 res0_T = op (x_T, C0);
1463 S4 res1_T = op (res0_T, C1);
1464 S5 ... = () res1_T; - type demotion
1466 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1467 constants.
1468 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1469 be 'type' or some intermediate type. For now, we expect S5 to be a type
1470 demotion operation. We also check that S3 and S4 have only one use. */
1472 static gimple
1473 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1474 tree *type_in, tree *type_out)
1476 gimple stmt = stmts->pop ();
1477 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1478 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1479 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1480 bool first;
1481 tree type = NULL;
1483 first = true;
1484 while (1)
1486 if (!vinfo_for_stmt (stmt)
1487 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1488 return NULL;
1490 new_def_stmt = NULL;
1491 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1492 &op0, &op1, &new_def_stmt,
1493 stmts))
1495 if (first)
1496 return NULL;
1497 else
1498 break;
1501 /* STMT can be performed on a smaller type. Check its uses. */
1502 use_stmt = vect_single_imm_use (stmt);
1503 if (!use_stmt || !is_gimple_assign (use_stmt))
1504 return NULL;
1506 /* Create pattern statement for STMT. */
1507 vectype = get_vectype_for_scalar_type (new_type);
1508 if (!vectype)
1509 return NULL;
1511 /* We want to collect all the statements for which we create pattern
1512 statetments, except for the case when the last statement in the
1513 sequence doesn't have a corresponding pattern statement. In such
1514 case we associate the last pattern statement with the last statement
1515 in the sequence. Therefore, we only add the original statement to
1516 the list if we know that it is not the last. */
1517 if (prev_stmt)
1518 stmts->safe_push (prev_stmt);
1520 var = vect_recog_temp_ssa_var (new_type, NULL);
1521 pattern_stmt
1522 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1523 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1524 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1526 if (dump_enabled_p ())
1528 dump_printf_loc (MSG_NOTE, vect_location,
1529 "created pattern stmt: ");
1530 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1531 dump_printf (MSG_NOTE, "\n");
1534 type = gimple_expr_type (stmt);
1535 prev_stmt = stmt;
1536 stmt = use_stmt;
1538 first = false;
1541 /* We got a sequence. We expect it to end with a type demotion operation.
1542 Otherwise, we quit (for now). There are three possible cases: the
1543 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1544 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1545 NEW_TYPE differs (we create a new conversion statement). */
1546 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1548 use_lhs = gimple_assign_lhs (use_stmt);
1549 use_type = TREE_TYPE (use_lhs);
1550 /* Support only type demotion or signedess change. */
1551 if (!INTEGRAL_TYPE_P (use_type)
1552 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1553 return NULL;
1555 /* Check that NEW_TYPE is not bigger than the conversion result. */
1556 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1557 return NULL;
1559 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1560 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1562 /* Create NEW_TYPE->USE_TYPE conversion. */
1563 new_oprnd = make_ssa_name (use_type);
1564 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1565 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1567 *type_in = get_vectype_for_scalar_type (new_type);
1568 *type_out = get_vectype_for_scalar_type (use_type);
1570 /* We created a pattern statement for the last statement in the
1571 sequence, so we don't need to associate it with the pattern
1572 statement created for PREV_STMT. Therefore, we add PREV_STMT
1573 to the list in order to mark it later in vect_pattern_recog_1. */
1574 if (prev_stmt)
1575 stmts->safe_push (prev_stmt);
1577 else
1579 if (prev_stmt)
1580 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1581 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1583 *type_in = vectype;
1584 *type_out = NULL_TREE;
1587 stmts->safe_push (use_stmt);
1589 else
1590 /* TODO: support general case, create a conversion to the correct type. */
1591 return NULL;
1593 /* Pattern detected. */
1594 if (dump_enabled_p ())
1596 dump_printf_loc (MSG_NOTE, vect_location,
1597 "vect_recog_over_widening_pattern: detected: ");
1598 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1599 dump_printf (MSG_NOTE, "\n");
1602 return pattern_stmt;
1605 /* Detect widening shift pattern:
1607 type a_t;
1608 TYPE a_T, res_T;
1610 S1 a_t = ;
1611 S2 a_T = (TYPE) a_t;
1612 S3 res_T = a_T << CONST;
1614 where type 'TYPE' is at least double the size of type 'type'.
1616 Also detect cases where the shift result is immediately converted
1617 to another type 'result_type' that is no larger in size than 'TYPE'.
1618 In those cases we perform a widen-shift that directly results in
1619 'result_type', to avoid a possible over-widening situation:
1621 type a_t;
1622 TYPE a_T, res_T;
1623 result_type res_result;
1625 S1 a_t = ;
1626 S2 a_T = (TYPE) a_t;
1627 S3 res_T = a_T << CONST;
1628 S4 res_result = (result_type) res_T;
1629 '--> res_result' = a_t w<< CONST;
1631 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1632 create an additional pattern stmt for S2 to create a variable of an
1633 intermediate type, and perform widen-shift on the intermediate type:
1635 type a_t;
1636 interm_type a_it;
1637 TYPE a_T, res_T, res_T';
1639 S1 a_t = ;
1640 S2 a_T = (TYPE) a_t;
1641 '--> a_it = (interm_type) a_t;
1642 S3 res_T = a_T << CONST;
1643 '--> res_T' = a_it <<* CONST;
1645 Input/Output:
1647 * STMTS: Contains a stmt from which the pattern search begins.
1648 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1649 in STMTS. When an intermediate type is used and a pattern statement is
1650 created for S2, we also put S2 here (before S3).
1652 Output:
1654 * TYPE_IN: The type of the input arguments to the pattern.
1656 * TYPE_OUT: The type of the output of this pattern.
1658 * Return value: A new stmt that will be used to replace the sequence of
1659 stmts that constitute the pattern. In this case it will be:
1660 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1662 static gimple
1663 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1664 tree *type_in, tree *type_out)
1666 gimple last_stmt = stmts->pop ();
1667 gimple def_stmt0;
1668 tree oprnd0, oprnd1;
1669 tree type, half_type0;
1670 gimple pattern_stmt;
1671 tree vectype, vectype_out = NULL_TREE;
1672 tree var;
1673 enum tree_code dummy_code;
1674 int dummy_int;
1675 vec<tree> dummy_vec;
1676 gimple use_stmt;
1677 bool promotion;
1679 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1680 return NULL;
1682 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1683 return NULL;
1685 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1686 return NULL;
1688 oprnd0 = gimple_assign_rhs1 (last_stmt);
1689 oprnd1 = gimple_assign_rhs2 (last_stmt);
1690 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1691 return NULL;
1693 /* Check operand 0: it has to be defined by a type promotion. */
1694 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1695 &promotion)
1696 || !promotion)
1697 return NULL;
1699 /* Check operand 1: has to be positive. We check that it fits the type
1700 in vect_handle_widen_op_by_const (). */
1701 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1702 return NULL;
1704 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1705 type = gimple_expr_type (last_stmt);
1707 /* Check for subsequent conversion to another type. */
1708 use_stmt = vect_single_imm_use (last_stmt);
1709 if (use_stmt && is_gimple_assign (use_stmt)
1710 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1711 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1713 tree use_lhs = gimple_assign_lhs (use_stmt);
1714 tree use_type = TREE_TYPE (use_lhs);
1716 if (INTEGRAL_TYPE_P (use_type)
1717 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1719 last_stmt = use_stmt;
1720 type = use_type;
1724 /* Check if this a widening operation. */
1725 gimple wstmt = NULL;
1726 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1727 &oprnd0, &wstmt,
1728 type, &half_type0, def_stmt0))
1729 return NULL;
1731 /* Pattern detected. */
1732 if (dump_enabled_p ())
1733 dump_printf_loc (MSG_NOTE, vect_location,
1734 "vect_recog_widen_shift_pattern: detected:\n");
1736 /* Check target support. */
1737 vectype = get_vectype_for_scalar_type (half_type0);
1738 vectype_out = get_vectype_for_scalar_type (type);
1740 if (!vectype
1741 || !vectype_out
1742 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1743 vectype_out, vectype,
1744 &dummy_code, &dummy_code,
1745 &dummy_int, &dummy_vec))
1746 return NULL;
1748 *type_in = vectype;
1749 *type_out = vectype_out;
1751 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1752 var = vect_recog_temp_ssa_var (type, NULL);
1753 pattern_stmt =
1754 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1755 if (wstmt)
1757 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1758 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1759 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1760 new_pattern_def_seq (stmt_vinfo, wstmt);
1761 stmt_vec_info new_stmt_info
1762 = new_stmt_vec_info (wstmt, loop_vinfo, bb_vinfo);
1763 set_vinfo_for_stmt (wstmt, new_stmt_info);
1764 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1767 if (dump_enabled_p ())
1768 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1770 stmts->safe_push (last_stmt);
1771 return pattern_stmt;
1774 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1776 type a_t, b_t, c_t;
1778 S0 a_t = b_t r<< c_t;
1780 Input/Output:
1782 * STMTS: Contains a stmt from which the pattern search begins,
1783 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1784 with a sequence:
1786 S1 d_t = -c_t;
1787 S2 e_t = d_t & (B - 1);
1788 S3 f_t = b_t << c_t;
1789 S4 g_t = b_t >> e_t;
1790 S0 a_t = f_t | g_t;
1792 where B is element bitsize of type.
1794 Output:
1796 * TYPE_IN: The type of the input arguments to the pattern.
1798 * TYPE_OUT: The type of the output of this pattern.
1800 * Return value: A new stmt that will be used to replace the rotate
1801 S0 stmt. */
1803 static gimple
1804 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1806 gimple last_stmt = stmts->pop ();
1807 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1808 gimple pattern_stmt, def_stmt;
1809 enum tree_code rhs_code;
1810 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1811 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1812 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1813 enum vect_def_type dt;
1814 optab optab1, optab2;
1815 edge ext_def = NULL;
1817 if (!is_gimple_assign (last_stmt))
1818 return NULL;
1820 rhs_code = gimple_assign_rhs_code (last_stmt);
1821 switch (rhs_code)
1823 case LROTATE_EXPR:
1824 case RROTATE_EXPR:
1825 break;
1826 default:
1827 return NULL;
1830 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1831 return NULL;
1833 lhs = gimple_assign_lhs (last_stmt);
1834 oprnd0 = gimple_assign_rhs1 (last_stmt);
1835 type = TREE_TYPE (oprnd0);
1836 oprnd1 = gimple_assign_rhs2 (last_stmt);
1837 if (TREE_CODE (oprnd0) != SSA_NAME
1838 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1839 || !INTEGRAL_TYPE_P (type)
1840 || !TYPE_UNSIGNED (type))
1841 return NULL;
1843 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1844 &def, &dt))
1845 return NULL;
1847 if (dt != vect_internal_def
1848 && dt != vect_constant_def
1849 && dt != vect_external_def)
1850 return NULL;
1852 vectype = get_vectype_for_scalar_type (type);
1853 if (vectype == NULL_TREE)
1854 return NULL;
1856 /* If vector/vector or vector/scalar rotate is supported by the target,
1857 don't do anything here. */
1858 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1859 if (optab1
1860 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1861 return NULL;
1863 if (bb_vinfo != NULL || dt != vect_internal_def)
1865 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1866 if (optab2
1867 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1868 return NULL;
1871 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1872 don't do anything here either. */
1873 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1874 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1875 if (!optab1
1876 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1877 || !optab2
1878 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1880 if (bb_vinfo == NULL && dt == vect_internal_def)
1881 return NULL;
1882 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1883 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1884 if (!optab1
1885 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1886 || !optab2
1887 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1888 return NULL;
1891 *type_in = vectype;
1892 *type_out = vectype;
1893 if (*type_in == NULL_TREE)
1894 return NULL;
1896 if (dt == vect_external_def
1897 && TREE_CODE (oprnd1) == SSA_NAME
1898 && loop_vinfo)
1900 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1901 ext_def = loop_preheader_edge (loop);
1902 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1904 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1905 if (bb == NULL
1906 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1907 ext_def = NULL;
1911 def = NULL_TREE;
1912 if (TREE_CODE (oprnd1) == INTEGER_CST
1913 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1914 def = oprnd1;
1915 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1917 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1918 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1919 && TYPE_PRECISION (TREE_TYPE (rhs1))
1920 == TYPE_PRECISION (type))
1921 def = rhs1;
1924 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1925 if (def == NULL_TREE)
1927 def = vect_recog_temp_ssa_var (type, NULL);
1928 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1929 if (ext_def)
1931 basic_block new_bb
1932 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1933 gcc_assert (!new_bb);
1935 else
1936 append_pattern_def_seq (stmt_vinfo, def_stmt);
1938 stype = TREE_TYPE (def);
1940 if (TREE_CODE (def) == INTEGER_CST)
1942 if (!tree_fits_uhwi_p (def)
1943 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1944 || integer_zerop (def))
1945 return NULL;
1946 def2 = build_int_cst (stype,
1947 GET_MODE_PRECISION (TYPE_MODE (type))
1948 - tree_to_uhwi (def));
1950 else
1952 tree vecstype = get_vectype_for_scalar_type (stype);
1953 stmt_vec_info def_stmt_vinfo;
1955 if (vecstype == NULL_TREE)
1956 return NULL;
1957 def2 = vect_recog_temp_ssa_var (stype, NULL);
1958 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1959 if (ext_def)
1961 basic_block new_bb
1962 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1963 gcc_assert (!new_bb);
1965 else
1967 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1968 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1969 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1970 append_pattern_def_seq (stmt_vinfo, def_stmt);
1973 def2 = vect_recog_temp_ssa_var (stype, NULL);
1974 tree mask
1975 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1976 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1977 gimple_assign_lhs (def_stmt), mask);
1978 if (ext_def)
1980 basic_block new_bb
1981 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1982 gcc_assert (!new_bb);
1984 else
1986 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1987 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1988 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1989 append_pattern_def_seq (stmt_vinfo, def_stmt);
1993 var1 = vect_recog_temp_ssa_var (type, NULL);
1994 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1995 ? LSHIFT_EXPR : RSHIFT_EXPR,
1996 oprnd0, def);
1997 append_pattern_def_seq (stmt_vinfo, def_stmt);
1999 var2 = vect_recog_temp_ssa_var (type, NULL);
2000 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2001 ? RSHIFT_EXPR : LSHIFT_EXPR,
2002 oprnd0, def2);
2003 append_pattern_def_seq (stmt_vinfo, def_stmt);
2005 /* Pattern detected. */
2006 if (dump_enabled_p ())
2007 dump_printf_loc (MSG_NOTE, vect_location,
2008 "vect_recog_rotate_pattern: detected:\n");
2010 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2011 var = vect_recog_temp_ssa_var (type, NULL);
2012 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2014 if (dump_enabled_p ())
2015 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2017 stmts->safe_push (last_stmt);
2018 return pattern_stmt;
2021 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2022 vectorized:
2024 type a_t;
2025 TYPE b_T, res_T;
2027 S1 a_t = ;
2028 S2 b_T = ;
2029 S3 res_T = b_T op a_t;
2031 where type 'TYPE' is a type with different size than 'type',
2032 and op is <<, >> or rotate.
2034 Also detect cases:
2036 type a_t;
2037 TYPE b_T, c_T, res_T;
2039 S0 c_T = ;
2040 S1 a_t = (type) c_T;
2041 S2 b_T = ;
2042 S3 res_T = b_T op a_t;
2044 Input/Output:
2046 * STMTS: Contains a stmt from which the pattern search begins,
2047 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2048 with a shift/rotate which has same type on both operands, in the
2049 second case just b_T op c_T, in the first case with added cast
2050 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2052 Output:
2054 * TYPE_IN: The type of the input arguments to the pattern.
2056 * TYPE_OUT: The type of the output of this pattern.
2058 * Return value: A new stmt that will be used to replace the shift/rotate
2059 S3 stmt. */
2061 static gimple
2062 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
2063 tree *type_in, tree *type_out)
2065 gimple last_stmt = stmts->pop ();
2066 tree oprnd0, oprnd1, lhs, var;
2067 gimple pattern_stmt, def_stmt;
2068 enum tree_code rhs_code;
2069 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2070 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2071 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2072 enum vect_def_type dt;
2073 tree def;
2075 if (!is_gimple_assign (last_stmt))
2076 return NULL;
2078 rhs_code = gimple_assign_rhs_code (last_stmt);
2079 switch (rhs_code)
2081 case LSHIFT_EXPR:
2082 case RSHIFT_EXPR:
2083 case LROTATE_EXPR:
2084 case RROTATE_EXPR:
2085 break;
2086 default:
2087 return NULL;
2090 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2091 return NULL;
2093 lhs = gimple_assign_lhs (last_stmt);
2094 oprnd0 = gimple_assign_rhs1 (last_stmt);
2095 oprnd1 = gimple_assign_rhs2 (last_stmt);
2096 if (TREE_CODE (oprnd0) != SSA_NAME
2097 || TREE_CODE (oprnd1) != SSA_NAME
2098 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2099 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2100 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2101 || TYPE_PRECISION (TREE_TYPE (lhs))
2102 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2103 return NULL;
2105 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
2106 &def, &dt))
2107 return NULL;
2109 if (dt != vect_internal_def)
2110 return NULL;
2112 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2113 *type_out = *type_in;
2114 if (*type_in == NULL_TREE)
2115 return NULL;
2117 def = NULL_TREE;
2118 if (gimple_assign_cast_p (def_stmt))
2120 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2121 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2122 && TYPE_PRECISION (TREE_TYPE (rhs1))
2123 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2124 def = rhs1;
2127 if (def == NULL_TREE)
2129 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2130 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2131 new_pattern_def_seq (stmt_vinfo, def_stmt);
2134 /* Pattern detected. */
2135 if (dump_enabled_p ())
2136 dump_printf_loc (MSG_NOTE, vect_location,
2137 "vect_recog_vector_vector_shift_pattern: detected:\n");
2139 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2140 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2141 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2143 if (dump_enabled_p ())
2144 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2146 stmts->safe_push (last_stmt);
2147 return pattern_stmt;
2150 /* Detect a signed division by a constant that wouldn't be
2151 otherwise vectorized:
2153 type a_t, b_t;
2155 S1 a_t = b_t / N;
2157 where type 'type' is an integral type and N is a constant.
2159 Similarly handle modulo by a constant:
2161 S4 a_t = b_t % N;
2163 Input/Output:
2165 * STMTS: Contains a stmt from which the pattern search begins,
2166 i.e. the division stmt. S1 is replaced by if N is a power
2167 of two constant and type is signed:
2168 S3 y_t = b_t < 0 ? N - 1 : 0;
2169 S2 x_t = b_t + y_t;
2170 S1' a_t = x_t >> log2 (N);
2172 S4 is replaced if N is a power of two constant and
2173 type is signed by (where *_T temporaries have unsigned type):
2174 S9 y_T = b_t < 0 ? -1U : 0U;
2175 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2176 S7 z_t = (type) z_T;
2177 S6 w_t = b_t + z_t;
2178 S5 x_t = w_t & (N - 1);
2179 S4' a_t = x_t - z_t;
2181 Output:
2183 * TYPE_IN: The type of the input arguments to the pattern.
2185 * TYPE_OUT: The type of the output of this pattern.
2187 * Return value: A new stmt that will be used to replace the division
2188 S1 or modulo S4 stmt. */
2190 static gimple
2191 vect_recog_divmod_pattern (vec<gimple> *stmts,
2192 tree *type_in, tree *type_out)
2194 gimple last_stmt = stmts->pop ();
2195 tree oprnd0, oprnd1, vectype, itype, cond;
2196 gimple pattern_stmt, def_stmt;
2197 enum tree_code rhs_code;
2198 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2199 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2200 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2201 optab optab;
2202 tree q;
2203 int dummy_int, prec;
2204 stmt_vec_info def_stmt_vinfo;
2206 if (!is_gimple_assign (last_stmt))
2207 return NULL;
2209 rhs_code = gimple_assign_rhs_code (last_stmt);
2210 switch (rhs_code)
2212 case TRUNC_DIV_EXPR:
2213 case TRUNC_MOD_EXPR:
2214 break;
2215 default:
2216 return NULL;
2219 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2220 return NULL;
2222 oprnd0 = gimple_assign_rhs1 (last_stmt);
2223 oprnd1 = gimple_assign_rhs2 (last_stmt);
2224 itype = TREE_TYPE (oprnd0);
2225 if (TREE_CODE (oprnd0) != SSA_NAME
2226 || TREE_CODE (oprnd1) != INTEGER_CST
2227 || TREE_CODE (itype) != INTEGER_TYPE
2228 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2229 return NULL;
2231 vectype = get_vectype_for_scalar_type (itype);
2232 if (vectype == NULL_TREE)
2233 return NULL;
2235 /* If the target can handle vectorized division or modulo natively,
2236 don't attempt to optimize this. */
2237 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2238 if (optab != unknown_optab)
2240 machine_mode vec_mode = TYPE_MODE (vectype);
2241 int icode = (int) optab_handler (optab, vec_mode);
2242 if (icode != CODE_FOR_nothing)
2243 return NULL;
2246 prec = TYPE_PRECISION (itype);
2247 if (integer_pow2p (oprnd1))
2249 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2250 return NULL;
2252 /* Pattern detected. */
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_NOTE, vect_location,
2255 "vect_recog_divmod_pattern: detected:\n");
2257 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2258 build_int_cst (itype, 0));
2259 if (rhs_code == TRUNC_DIV_EXPR)
2261 tree var = vect_recog_temp_ssa_var (itype, NULL);
2262 tree shift;
2263 def_stmt
2264 = gimple_build_assign (var, COND_EXPR, cond,
2265 fold_build2 (MINUS_EXPR, itype, oprnd1,
2266 build_int_cst (itype, 1)),
2267 build_int_cst (itype, 0));
2268 new_pattern_def_seq (stmt_vinfo, def_stmt);
2269 var = vect_recog_temp_ssa_var (itype, NULL);
2270 def_stmt
2271 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2272 gimple_assign_lhs (def_stmt));
2273 append_pattern_def_seq (stmt_vinfo, def_stmt);
2275 shift = build_int_cst (itype, tree_log2 (oprnd1));
2276 pattern_stmt
2277 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2278 RSHIFT_EXPR, var, shift);
2280 else
2282 tree signmask;
2283 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2284 if (compare_tree_int (oprnd1, 2) == 0)
2286 signmask = vect_recog_temp_ssa_var (itype, NULL);
2287 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2288 build_int_cst (itype, 1),
2289 build_int_cst (itype, 0));
2290 append_pattern_def_seq (stmt_vinfo, def_stmt);
2292 else
2294 tree utype
2295 = build_nonstandard_integer_type (prec, 1);
2296 tree vecutype = get_vectype_for_scalar_type (utype);
2297 tree shift
2298 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2299 - tree_log2 (oprnd1));
2300 tree var = vect_recog_temp_ssa_var (utype, NULL);
2302 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2303 build_int_cst (utype, -1),
2304 build_int_cst (utype, 0));
2305 def_stmt_vinfo
2306 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2307 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2308 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2309 append_pattern_def_seq (stmt_vinfo, def_stmt);
2310 var = vect_recog_temp_ssa_var (utype, NULL);
2311 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2312 gimple_assign_lhs (def_stmt),
2313 shift);
2314 def_stmt_vinfo
2315 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2316 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2317 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2318 append_pattern_def_seq (stmt_vinfo, def_stmt);
2319 signmask = vect_recog_temp_ssa_var (itype, NULL);
2320 def_stmt
2321 = gimple_build_assign (signmask, NOP_EXPR, var);
2322 append_pattern_def_seq (stmt_vinfo, def_stmt);
2324 def_stmt
2325 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2326 PLUS_EXPR, oprnd0, signmask);
2327 append_pattern_def_seq (stmt_vinfo, def_stmt);
2328 def_stmt
2329 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2330 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2331 fold_build2 (MINUS_EXPR, itype, oprnd1,
2332 build_int_cst (itype, 1)));
2333 append_pattern_def_seq (stmt_vinfo, def_stmt);
2335 pattern_stmt
2336 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2337 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2338 signmask);
2341 if (dump_enabled_p ())
2342 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2345 stmts->safe_push (last_stmt);
2347 *type_in = vectype;
2348 *type_out = vectype;
2349 return pattern_stmt;
2352 if (prec > HOST_BITS_PER_WIDE_INT
2353 || integer_zerop (oprnd1))
2354 return NULL;
2356 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2357 return NULL;
2359 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2361 if (TYPE_UNSIGNED (itype))
2363 unsigned HOST_WIDE_INT mh, ml;
2364 int pre_shift, post_shift;
2365 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2366 & GET_MODE_MASK (TYPE_MODE (itype)));
2367 tree t1, t2, t3, t4;
2369 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2370 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2371 return NULL;
2373 /* Find a suitable multiplier and right shift count
2374 instead of multiplying with D. */
2375 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2377 /* If the suggested multiplier is more than SIZE bits, we can do better
2378 for even divisors, using an initial right shift. */
2379 if (mh != 0 && (d & 1) == 0)
2381 pre_shift = floor_log2 (d & -d);
2382 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2383 &ml, &post_shift, &dummy_int);
2384 gcc_assert (!mh);
2386 else
2387 pre_shift = 0;
2389 if (mh != 0)
2391 if (post_shift - 1 >= prec)
2392 return NULL;
2394 /* t1 = oprnd0 h* ml;
2395 t2 = oprnd0 - t1;
2396 t3 = t2 >> 1;
2397 t4 = t1 + t3;
2398 q = t4 >> (post_shift - 1); */
2399 t1 = vect_recog_temp_ssa_var (itype, NULL);
2400 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2401 build_int_cst (itype, ml));
2402 append_pattern_def_seq (stmt_vinfo, def_stmt);
2404 t2 = vect_recog_temp_ssa_var (itype, NULL);
2405 def_stmt
2406 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2407 append_pattern_def_seq (stmt_vinfo, def_stmt);
2409 t3 = vect_recog_temp_ssa_var (itype, NULL);
2410 def_stmt
2411 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2412 append_pattern_def_seq (stmt_vinfo, def_stmt);
2414 t4 = vect_recog_temp_ssa_var (itype, NULL);
2415 def_stmt
2416 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2418 if (post_shift != 1)
2420 append_pattern_def_seq (stmt_vinfo, def_stmt);
2422 q = vect_recog_temp_ssa_var (itype, NULL);
2423 pattern_stmt
2424 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2425 build_int_cst (itype, post_shift - 1));
2427 else
2429 q = t4;
2430 pattern_stmt = def_stmt;
2433 else
2435 if (pre_shift >= prec || post_shift >= prec)
2436 return NULL;
2438 /* t1 = oprnd0 >> pre_shift;
2439 t2 = t1 h* ml;
2440 q = t2 >> post_shift; */
2441 if (pre_shift)
2443 t1 = vect_recog_temp_ssa_var (itype, NULL);
2444 def_stmt
2445 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2446 build_int_cst (NULL, pre_shift));
2447 append_pattern_def_seq (stmt_vinfo, def_stmt);
2449 else
2450 t1 = oprnd0;
2452 t2 = vect_recog_temp_ssa_var (itype, NULL);
2453 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2454 build_int_cst (itype, ml));
2456 if (post_shift)
2458 append_pattern_def_seq (stmt_vinfo, def_stmt);
2460 q = vect_recog_temp_ssa_var (itype, NULL);
2461 def_stmt
2462 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2463 build_int_cst (itype, post_shift));
2465 else
2466 q = t2;
2468 pattern_stmt = def_stmt;
2471 else
2473 unsigned HOST_WIDE_INT ml;
2474 int post_shift;
2475 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2476 unsigned HOST_WIDE_INT abs_d;
2477 bool add = false;
2478 tree t1, t2, t3, t4;
2480 /* Give up for -1. */
2481 if (d == -1)
2482 return NULL;
2484 /* Since d might be INT_MIN, we have to cast to
2485 unsigned HOST_WIDE_INT before negating to avoid
2486 undefined signed overflow. */
2487 abs_d = (d >= 0
2488 ? (unsigned HOST_WIDE_INT) d
2489 : - (unsigned HOST_WIDE_INT) d);
2491 /* n rem d = n rem -d */
2492 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2494 d = abs_d;
2495 oprnd1 = build_int_cst (itype, abs_d);
2497 else if (HOST_BITS_PER_WIDE_INT >= prec
2498 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2499 /* This case is not handled correctly below. */
2500 return NULL;
2502 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2503 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2505 add = true;
2506 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2508 if (post_shift >= prec)
2509 return NULL;
2511 /* t1 = oprnd0 h* ml; */
2512 t1 = vect_recog_temp_ssa_var (itype, NULL);
2513 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2514 build_int_cst (itype, ml));
2516 if (add)
2518 /* t2 = t1 + oprnd0; */
2519 append_pattern_def_seq (stmt_vinfo, def_stmt);
2520 t2 = vect_recog_temp_ssa_var (itype, NULL);
2521 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2523 else
2524 t2 = t1;
2526 if (post_shift)
2528 /* t3 = t2 >> post_shift; */
2529 append_pattern_def_seq (stmt_vinfo, def_stmt);
2530 t3 = vect_recog_temp_ssa_var (itype, NULL);
2531 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2532 build_int_cst (itype, post_shift));
2534 else
2535 t3 = t2;
2537 wide_int oprnd0_min, oprnd0_max;
2538 int msb = 1;
2539 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2541 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2542 msb = 0;
2543 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2544 msb = -1;
2547 if (msb == 0 && d >= 0)
2549 /* q = t3; */
2550 q = t3;
2551 pattern_stmt = def_stmt;
2553 else
2555 /* t4 = oprnd0 >> (prec - 1);
2556 or if we know from VRP that oprnd0 >= 0
2557 t4 = 0;
2558 or if we know from VRP that oprnd0 < 0
2559 t4 = -1; */
2560 append_pattern_def_seq (stmt_vinfo, def_stmt);
2561 t4 = vect_recog_temp_ssa_var (itype, NULL);
2562 if (msb != 1)
2563 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2564 build_int_cst (itype, msb));
2565 else
2566 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2567 build_int_cst (itype, prec - 1));
2568 append_pattern_def_seq (stmt_vinfo, def_stmt);
2570 /* q = t3 - t4; or q = t4 - t3; */
2571 q = vect_recog_temp_ssa_var (itype, NULL);
2572 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2573 d < 0 ? t3 : t4);
2577 if (rhs_code == TRUNC_MOD_EXPR)
2579 tree r, t1;
2581 /* We divided. Now finish by:
2582 t1 = q * oprnd1;
2583 r = oprnd0 - t1; */
2584 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2586 t1 = vect_recog_temp_ssa_var (itype, NULL);
2587 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2588 append_pattern_def_seq (stmt_vinfo, def_stmt);
2590 r = vect_recog_temp_ssa_var (itype, NULL);
2591 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2594 /* Pattern detected. */
2595 if (dump_enabled_p ())
2597 dump_printf_loc (MSG_NOTE, vect_location,
2598 "vect_recog_divmod_pattern: detected: ");
2599 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2600 dump_printf (MSG_NOTE, "\n");
2603 stmts->safe_push (last_stmt);
2605 *type_in = vectype;
2606 *type_out = vectype;
2607 return pattern_stmt;
2610 /* Function vect_recog_mixed_size_cond_pattern
2612 Try to find the following pattern:
2614 type x_t, y_t;
2615 TYPE a_T, b_T, c_T;
2616 loop:
2617 S1 a_T = x_t CMP y_t ? b_T : c_T;
2619 where type 'TYPE' is an integral type which has different size
2620 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2621 than 'type', the constants need to fit into an integer type
2622 with the same width as 'type') or results of conversion from 'type'.
2624 Input:
2626 * LAST_STMT: A stmt from which the pattern search begins.
2628 Output:
2630 * TYPE_IN: The type of the input arguments to the pattern.
2632 * TYPE_OUT: The type of the output of this pattern.
2634 * Return value: A new stmt that will be used to replace the pattern.
2635 Additionally a def_stmt is added.
2637 a_it = x_t CMP y_t ? b_it : c_it;
2638 a_T = (TYPE) a_it; */
2640 static gimple
2641 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2642 tree *type_out)
2644 gimple last_stmt = (*stmts)[0];
2645 tree cond_expr, then_clause, else_clause;
2646 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2647 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2648 machine_mode cmpmode;
2649 gimple pattern_stmt, def_stmt;
2650 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2651 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2652 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2653 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2654 bool promotion;
2655 tree comp_scalar_type;
2657 if (!is_gimple_assign (last_stmt)
2658 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2659 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2660 return NULL;
2662 cond_expr = gimple_assign_rhs1 (last_stmt);
2663 then_clause = gimple_assign_rhs2 (last_stmt);
2664 else_clause = gimple_assign_rhs3 (last_stmt);
2666 if (!COMPARISON_CLASS_P (cond_expr))
2667 return NULL;
2669 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2670 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2671 if (comp_vectype == NULL_TREE)
2672 return NULL;
2674 type = gimple_expr_type (last_stmt);
2675 if (types_compatible_p (type, comp_scalar_type)
2676 || ((TREE_CODE (then_clause) != INTEGER_CST
2677 || TREE_CODE (else_clause) != INTEGER_CST)
2678 && !INTEGRAL_TYPE_P (comp_scalar_type))
2679 || !INTEGRAL_TYPE_P (type))
2680 return NULL;
2682 if ((TREE_CODE (then_clause) != INTEGER_CST
2683 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2684 &def_stmt0, &promotion))
2685 || (TREE_CODE (else_clause) != INTEGER_CST
2686 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2687 &def_stmt1, &promotion)))
2688 return NULL;
2690 if (orig_type0 && orig_type1
2691 && !types_compatible_p (orig_type0, orig_type1))
2692 return NULL;
2694 if (orig_type0)
2696 if (!types_compatible_p (orig_type0, comp_scalar_type))
2697 return NULL;
2698 then_clause = gimple_assign_rhs1 (def_stmt0);
2699 itype = orig_type0;
2702 if (orig_type1)
2704 if (!types_compatible_p (orig_type1, comp_scalar_type))
2705 return NULL;
2706 else_clause = gimple_assign_rhs1 (def_stmt1);
2707 itype = orig_type1;
2710 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2712 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2713 return NULL;
2715 vectype = get_vectype_for_scalar_type (type);
2716 if (vectype == NULL_TREE)
2717 return NULL;
2719 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2720 return NULL;
2722 if (itype == NULL_TREE)
2723 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2724 TYPE_UNSIGNED (type));
2726 if (itype == NULL_TREE
2727 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2728 return NULL;
2730 vecitype = get_vectype_for_scalar_type (itype);
2731 if (vecitype == NULL_TREE)
2732 return NULL;
2734 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2735 return NULL;
2737 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2739 if ((TREE_CODE (then_clause) == INTEGER_CST
2740 && !int_fits_type_p (then_clause, itype))
2741 || (TREE_CODE (else_clause) == INTEGER_CST
2742 && !int_fits_type_p (else_clause, itype)))
2743 return NULL;
2746 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2747 COND_EXPR, unshare_expr (cond_expr),
2748 fold_convert (itype, then_clause),
2749 fold_convert (itype, else_clause));
2750 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2751 NOP_EXPR, gimple_assign_lhs (def_stmt));
2753 new_pattern_def_seq (stmt_vinfo, def_stmt);
2754 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2755 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2756 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2757 *type_in = vecitype;
2758 *type_out = vectype;
2760 if (dump_enabled_p ())
2761 dump_printf_loc (MSG_NOTE, vect_location,
2762 "vect_recog_mixed_size_cond_pattern: detected:\n");
2764 return pattern_stmt;
2768 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2769 true if bool VAR can be optimized that way. */
2771 static bool
2772 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2774 gimple def_stmt;
2775 enum vect_def_type dt;
2776 tree def, rhs1;
2777 enum tree_code rhs_code;
2779 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2780 &dt))
2781 return false;
2783 if (dt != vect_internal_def)
2784 return false;
2786 if (!is_gimple_assign (def_stmt))
2787 return false;
2789 if (!has_single_use (def))
2790 return false;
2792 rhs1 = gimple_assign_rhs1 (def_stmt);
2793 rhs_code = gimple_assign_rhs_code (def_stmt);
2794 switch (rhs_code)
2796 case SSA_NAME:
2797 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2799 CASE_CONVERT:
2800 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2801 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2802 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2803 return false;
2804 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2806 case BIT_NOT_EXPR:
2807 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2809 case BIT_AND_EXPR:
2810 case BIT_IOR_EXPR:
2811 case BIT_XOR_EXPR:
2812 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2813 return false;
2814 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2815 bb_vinfo);
2817 default:
2818 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2820 tree vecitype, comp_vectype;
2822 /* If the comparison can throw, then is_gimple_condexpr will be
2823 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2824 if (stmt_could_throw_p (def_stmt))
2825 return false;
2827 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2828 if (comp_vectype == NULL_TREE)
2829 return false;
2831 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2833 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2834 tree itype
2835 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2836 vecitype = get_vectype_for_scalar_type (itype);
2837 if (vecitype == NULL_TREE)
2838 return false;
2840 else
2841 vecitype = comp_vectype;
2842 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2844 return false;
2849 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2850 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2851 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2853 static tree
2854 adjust_bool_pattern_cast (tree type, tree var)
2856 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2857 gimple cast_stmt, pattern_stmt;
2859 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2860 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2861 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2862 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2863 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2864 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2865 return gimple_assign_lhs (cast_stmt);
2869 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2870 recursively. VAR is an SSA_NAME that should be transformed from bool
2871 to a wider integer type, OUT_TYPE is the desired final integer type of
2872 the whole pattern, TRUEVAL should be NULL unless optimizing
2873 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2874 in the then_clause, STMTS is where statements with added pattern stmts
2875 should be pushed to. */
2877 static tree
2878 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2879 vec<gimple> *stmts)
2881 gimple stmt = SSA_NAME_DEF_STMT (var);
2882 enum tree_code rhs_code, def_rhs_code;
2883 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2884 location_t loc;
2885 gimple pattern_stmt, def_stmt;
2887 rhs1 = gimple_assign_rhs1 (stmt);
2888 rhs2 = gimple_assign_rhs2 (stmt);
2889 rhs_code = gimple_assign_rhs_code (stmt);
2890 loc = gimple_location (stmt);
2891 switch (rhs_code)
2893 case SSA_NAME:
2894 CASE_CONVERT:
2895 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2896 itype = TREE_TYPE (irhs1);
2897 pattern_stmt
2898 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2899 SSA_NAME, irhs1);
2900 break;
2902 case BIT_NOT_EXPR:
2903 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2904 itype = TREE_TYPE (irhs1);
2905 pattern_stmt
2906 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2907 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
2908 break;
2910 case BIT_AND_EXPR:
2911 /* Try to optimize x = y & (a < b ? 1 : 0); into
2912 x = (a < b ? y : 0);
2914 E.g. for:
2915 bool a_b, b_b, c_b;
2916 TYPE d_T;
2918 S1 a_b = x1 CMP1 y1;
2919 S2 b_b = x2 CMP2 y2;
2920 S3 c_b = a_b & b_b;
2921 S4 d_T = (TYPE) c_b;
2923 we would normally emit:
2925 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2926 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2927 S3' c_T = a_T & b_T;
2928 S4' d_T = c_T;
2930 but we can save one stmt by using the
2931 result of one of the COND_EXPRs in the other COND_EXPR and leave
2932 BIT_AND_EXPR stmt out:
2934 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2935 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2936 S4' f_T = c_T;
2938 At least when VEC_COND_EXPR is implemented using masks
2939 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2940 computes the comparison masks and ands it, in one case with
2941 all ones vector, in the other case with a vector register.
2942 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2943 often more expensive. */
2944 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2945 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2946 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2948 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2949 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2950 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2951 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2953 gimple tstmt;
2954 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2955 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2956 tstmt = stmts->pop ();
2957 gcc_assert (tstmt == def_stmt);
2958 stmts->quick_push (stmt);
2959 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2960 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2961 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2962 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2963 return irhs2;
2965 else
2966 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2967 goto and_ior_xor;
2969 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2970 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2971 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2973 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2974 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2975 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2976 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2978 gimple tstmt;
2979 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2980 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2981 tstmt = stmts->pop ();
2982 gcc_assert (tstmt == def_stmt);
2983 stmts->quick_push (stmt);
2984 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2985 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2986 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2987 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2988 return irhs1;
2990 else
2991 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2992 goto and_ior_xor;
2994 /* FALLTHRU */
2995 case BIT_IOR_EXPR:
2996 case BIT_XOR_EXPR:
2997 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2998 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2999 and_ior_xor:
3000 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3001 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3003 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3004 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3005 int out_prec = TYPE_PRECISION (out_type);
3006 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3007 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3008 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3009 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3010 else
3012 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3013 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3016 itype = TREE_TYPE (irhs1);
3017 pattern_stmt
3018 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3019 rhs_code, irhs1, irhs2);
3020 break;
3022 default:
3023 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3024 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3025 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3026 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3027 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3029 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3030 itype
3031 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3033 else
3034 itype = TREE_TYPE (rhs1);
3035 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3036 if (trueval == NULL_TREE)
3037 trueval = build_int_cst (itype, 1);
3038 else
3039 gcc_checking_assert (useless_type_conversion_p (itype,
3040 TREE_TYPE (trueval)));
3041 pattern_stmt
3042 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3043 COND_EXPR, cond_expr, trueval,
3044 build_int_cst (itype, 0));
3045 break;
3048 stmts->safe_push (stmt);
3049 gimple_set_location (pattern_stmt, loc);
3050 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3051 return gimple_assign_lhs (pattern_stmt);
3055 /* Function vect_recog_bool_pattern
3057 Try to find pattern like following:
3059 bool a_b, b_b, c_b, d_b, e_b;
3060 TYPE f_T;
3061 loop:
3062 S1 a_b = x1 CMP1 y1;
3063 S2 b_b = x2 CMP2 y2;
3064 S3 c_b = a_b & b_b;
3065 S4 d_b = x3 CMP3 y3;
3066 S5 e_b = c_b | d_b;
3067 S6 f_T = (TYPE) e_b;
3069 where type 'TYPE' is an integral type. Or a similar pattern
3070 ending in
3072 S6 f_Y = e_b ? r_Y : s_Y;
3074 as results from if-conversion of a complex condition.
3076 Input:
3078 * LAST_STMT: A stmt at the end from which the pattern
3079 search begins, i.e. cast of a bool to
3080 an integer type.
3082 Output:
3084 * TYPE_IN: The type of the input arguments to the pattern.
3086 * TYPE_OUT: The type of the output of this pattern.
3088 * Return value: A new stmt that will be used to replace the pattern.
3090 Assuming size of TYPE is the same as size of all comparisons
3091 (otherwise some casts would be added where needed), the above
3092 sequence we create related pattern stmts:
3093 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3094 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3095 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3096 S5' e_T = c_T | d_T;
3097 S6' f_T = e_T;
3099 Instead of the above S3' we could emit:
3100 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3101 S3' c_T = a_T | b_T;
3102 but the above is more efficient. */
3104 static gimple
3105 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
3106 tree *type_out)
3108 gimple last_stmt = stmts->pop ();
3109 enum tree_code rhs_code;
3110 tree var, lhs, rhs, vectype;
3111 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3113 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
3114 gimple pattern_stmt;
3116 if (!is_gimple_assign (last_stmt))
3117 return NULL;
3119 var = gimple_assign_rhs1 (last_stmt);
3120 lhs = gimple_assign_lhs (last_stmt);
3122 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3123 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3124 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3125 return NULL;
3127 rhs_code = gimple_assign_rhs_code (last_stmt);
3128 if (CONVERT_EXPR_CODE_P (rhs_code))
3130 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3131 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3132 return NULL;
3133 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3134 if (vectype == NULL_TREE)
3135 return NULL;
3137 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3138 return NULL;
3140 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3141 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3142 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3143 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3144 else
3145 pattern_stmt
3146 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3147 *type_out = vectype;
3148 *type_in = vectype;
3149 stmts->safe_push (last_stmt);
3150 if (dump_enabled_p ())
3151 dump_printf_loc (MSG_NOTE, vect_location,
3152 "vect_recog_bool_pattern: detected:\n");
3154 return pattern_stmt;
3156 else if (rhs_code == COND_EXPR
3157 && TREE_CODE (var) == SSA_NAME)
3159 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3160 if (vectype == NULL_TREE)
3161 return NULL;
3163 /* Build a scalar type for the boolean result that when
3164 vectorized matches the vector type of the result in
3165 size and number of elements. */
3166 unsigned prec
3167 = wi::udiv_trunc (TYPE_SIZE (vectype),
3168 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3169 tree type
3170 = build_nonstandard_integer_type (prec,
3171 TYPE_UNSIGNED (TREE_TYPE (var)));
3172 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3173 return NULL;
3175 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3176 return NULL;
3178 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3179 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3180 pattern_stmt
3181 = gimple_build_assign (lhs, COND_EXPR,
3182 build2 (NE_EXPR, boolean_type_node,
3183 rhs, build_int_cst (type, 0)),
3184 gimple_assign_rhs2 (last_stmt),
3185 gimple_assign_rhs3 (last_stmt));
3186 *type_out = vectype;
3187 *type_in = vectype;
3188 stmts->safe_push (last_stmt);
3189 if (dump_enabled_p ())
3190 dump_printf_loc (MSG_NOTE, vect_location,
3191 "vect_recog_bool_pattern: detected:\n");
3193 return pattern_stmt;
3195 else if (rhs_code == SSA_NAME
3196 && STMT_VINFO_DATA_REF (stmt_vinfo))
3198 stmt_vec_info pattern_stmt_info;
3199 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3200 gcc_assert (vectype != NULL_TREE);
3201 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3202 return NULL;
3203 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3204 return NULL;
3206 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3207 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3208 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3210 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3211 gimple cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3212 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3213 rhs = rhs2;
3215 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3216 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3217 bb_vinfo);
3218 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3219 STMT_VINFO_DATA_REF (pattern_stmt_info)
3220 = STMT_VINFO_DATA_REF (stmt_vinfo);
3221 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3222 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3223 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3224 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3225 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3226 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3227 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3228 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3229 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3230 *type_out = vectype;
3231 *type_in = vectype;
3232 stmts->safe_push (last_stmt);
3233 if (dump_enabled_p ())
3234 dump_printf_loc (MSG_NOTE, vect_location,
3235 "vect_recog_bool_pattern: detected:\n");
3236 return pattern_stmt;
3238 else
3239 return NULL;
3243 /* Mark statements that are involved in a pattern. */
3245 static inline void
3246 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3247 tree pattern_vectype)
3249 stmt_vec_info pattern_stmt_info, def_stmt_info;
3250 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3251 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3252 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3253 gimple def_stmt;
3255 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3256 if (pattern_stmt_info == NULL)
3258 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3259 bb_vinfo);
3260 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3262 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3264 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3265 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3266 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3267 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3268 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3269 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3270 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3271 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3272 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3274 gimple_stmt_iterator si;
3275 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3276 !gsi_end_p (si); gsi_next (&si))
3278 def_stmt = gsi_stmt (si);
3279 def_stmt_info = vinfo_for_stmt (def_stmt);
3280 if (def_stmt_info == NULL)
3282 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3283 bb_vinfo);
3284 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3286 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3287 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3288 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3289 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3290 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3295 /* Function vect_pattern_recog_1
3297 Input:
3298 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3299 computation pattern.
3300 STMT: A stmt from which the pattern search should start.
3302 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3303 expression that computes the same functionality and can be used to
3304 replace the sequence of stmts that are involved in the pattern.
3306 Output:
3307 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3308 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3309 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3310 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3311 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3312 to the available target pattern.
3314 This function also does some bookkeeping, as explained in the documentation
3315 for vect_recog_pattern. */
3317 static void
3318 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3319 gimple_stmt_iterator si,
3320 vec<gimple> *stmts_to_replace)
3322 gimple stmt = gsi_stmt (si), pattern_stmt;
3323 stmt_vec_info stmt_info;
3324 loop_vec_info loop_vinfo;
3325 tree pattern_vectype;
3326 tree type_in, type_out;
3327 enum tree_code code;
3328 int i;
3329 gimple next;
3331 stmts_to_replace->truncate (0);
3332 stmts_to_replace->quick_push (stmt);
3333 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3334 if (!pattern_stmt)
3335 return;
3337 stmt = stmts_to_replace->last ();
3338 stmt_info = vinfo_for_stmt (stmt);
3339 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3341 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3343 /* No need to check target support (already checked by the pattern
3344 recognition function). */
3345 pattern_vectype = type_out ? type_out : type_in;
3347 else
3349 machine_mode vec_mode;
3350 enum insn_code icode;
3351 optab optab;
3353 /* Check target support */
3354 type_in = get_vectype_for_scalar_type (type_in);
3355 if (!type_in)
3356 return;
3357 if (type_out)
3358 type_out = get_vectype_for_scalar_type (type_out);
3359 else
3360 type_out = type_in;
3361 if (!type_out)
3362 return;
3363 pattern_vectype = type_out;
3365 if (is_gimple_assign (pattern_stmt))
3366 code = gimple_assign_rhs_code (pattern_stmt);
3367 else
3369 gcc_assert (is_gimple_call (pattern_stmt));
3370 code = CALL_EXPR;
3373 optab = optab_for_tree_code (code, type_in, optab_default);
3374 vec_mode = TYPE_MODE (type_in);
3375 if (!optab
3376 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3377 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3378 return;
3381 /* Found a vectorizable pattern. */
3382 if (dump_enabled_p ())
3384 dump_printf_loc (MSG_NOTE, vect_location,
3385 "pattern recognized: ");
3386 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3389 /* Mark the stmts that are involved in the pattern. */
3390 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3392 /* Patterns cannot be vectorized using SLP, because they change the order of
3393 computation. */
3394 if (loop_vinfo)
3395 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3396 if (next == stmt)
3397 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3399 /* It is possible that additional pattern stmts are created and inserted in
3400 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3401 relevant statements. */
3402 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3403 && (unsigned) i < (stmts_to_replace->length () - 1);
3404 i++)
3406 stmt_info = vinfo_for_stmt (stmt);
3407 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3408 if (dump_enabled_p ())
3410 dump_printf_loc (MSG_NOTE, vect_location,
3411 "additional pattern stmt: ");
3412 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3415 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3420 /* Function vect_pattern_recog
3422 Input:
3423 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3424 computation idioms.
3426 Output - for each computation idiom that is detected we create a new stmt
3427 that provides the same functionality and that can be vectorized. We
3428 also record some information in the struct_stmt_info of the relevant
3429 stmts, as explained below:
3431 At the entry to this function we have the following stmts, with the
3432 following initial value in the STMT_VINFO fields:
3434 stmt in_pattern_p related_stmt vec_stmt
3435 S1: a_i = .... - - -
3436 S2: a_2 = ..use(a_i).. - - -
3437 S3: a_1 = ..use(a_2).. - - -
3438 S4: a_0 = ..use(a_1).. - - -
3439 S5: ... = ..use(a_0).. - - -
3441 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3442 represented by a single stmt. We then:
3443 - create a new stmt S6 equivalent to the pattern (the stmt is not
3444 inserted into the code)
3445 - fill in the STMT_VINFO fields as follows:
3447 in_pattern_p related_stmt vec_stmt
3448 S1: a_i = .... - - -
3449 S2: a_2 = ..use(a_i).. - - -
3450 S3: a_1 = ..use(a_2).. - - -
3451 S4: a_0 = ..use(a_1).. true S6 -
3452 '---> S6: a_new = .... - S4 -
3453 S5: ... = ..use(a_0).. - - -
3455 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3456 to each other through the RELATED_STMT field).
3458 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3459 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3460 remain irrelevant unless used by stmts other than S4.
3462 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3463 (because they are marked as irrelevant). It will vectorize S6, and record
3464 a pointer to the new vector stmt VS6 from S6 (as usual).
3465 S4 will be skipped, and S5 will be vectorized as usual:
3467 in_pattern_p related_stmt vec_stmt
3468 S1: a_i = .... - - -
3469 S2: a_2 = ..use(a_i).. - - -
3470 S3: a_1 = ..use(a_2).. - - -
3471 > VS6: va_new = .... - - -
3472 S4: a_0 = ..use(a_1).. true S6 VS6
3473 '---> S6: a_new = .... - S4 VS6
3474 > VS5: ... = ..vuse(va_new).. - - -
3475 S5: ... = ..use(a_0).. - - -
3477 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3478 elsewhere), and we'll end up with:
3480 VS6: va_new = ....
3481 VS5: ... = ..vuse(va_new)..
3483 In case of more than one pattern statements, e.g., widen-mult with
3484 intermediate type:
3486 S1 a_t = ;
3487 S2 a_T = (TYPE) a_t;
3488 '--> S3: a_it = (interm_type) a_t;
3489 S4 prod_T = a_T * CONST;
3490 '--> S5: prod_T' = a_it w* CONST;
3492 there may be other users of a_T outside the pattern. In that case S2 will
3493 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3494 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3495 be recorded in S3. */
3497 void
3498 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3500 struct loop *loop;
3501 basic_block *bbs;
3502 unsigned int nbbs;
3503 gimple_stmt_iterator si;
3504 unsigned int i, j;
3505 vect_recog_func_ptr vect_recog_func;
3506 auto_vec<gimple, 1> stmts_to_replace;
3507 gimple stmt;
3509 if (dump_enabled_p ())
3510 dump_printf_loc (MSG_NOTE, vect_location,
3511 "=== vect_pattern_recog ===\n");
3513 if (loop_vinfo)
3515 loop = LOOP_VINFO_LOOP (loop_vinfo);
3516 bbs = LOOP_VINFO_BBS (loop_vinfo);
3517 nbbs = loop->num_nodes;
3519 else
3521 bbs = &BB_VINFO_BB (bb_vinfo);
3522 nbbs = 1;
3525 /* Scan through the loop stmts, applying the pattern recognition
3526 functions starting at each stmt visited: */
3527 for (i = 0; i < nbbs; i++)
3529 basic_block bb = bbs[i];
3530 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3532 if (bb_vinfo && (stmt = gsi_stmt (si))
3533 && vinfo_for_stmt (stmt)
3534 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3535 continue;
3537 /* Scan over all generic vect_recog_xxx_pattern functions. */
3538 for (j = 0; j < NUM_PATTERNS; j++)
3540 vect_recog_func = vect_vect_recog_func_ptrs[j];
3541 vect_pattern_recog_1 (vect_recog_func, si,
3542 &stmts_to_replace);