2015-10-18 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-patterns.c
blob3fe094cb92d3df4f628d0ded9b9ae74a4aa6fab9
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "rtl.h"
28 #include "ssa.h"
29 #include "alias.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "target.h"
33 #include "dominance.h"
34 #include "gimple-pretty-print.h"
35 #include "internal-fn.h"
36 #include "tree-eh.h"
37 #include "gimplify.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "flags.h"
41 #include "insn-config.h"
42 #include "expmed.h"
43 #include "insn-codes.h"
44 #include "optabs-tree.h"
45 #include "params.h"
46 #include "tree-data-ref.h"
47 #include "tree-vectorizer.h"
48 #include "recog.h" /* FIXME: for insn_data */
49 #include "diagnostic-core.h"
50 #include "dumpfile.h"
51 #include "builtins.h"
53 /* Pattern recognition functions */
54 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
55 tree *);
56 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
57 tree *);
58 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
59 tree *);
60 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
61 tree *);
62 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
63 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
64 tree *);
65 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
66 tree *, tree *);
67 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
68 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
69 tree *, tree *);
70 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
71 tree *, tree *);
73 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
74 tree *, tree *);
76 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
77 tree *, tree *);
78 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
79 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
80 vect_recog_widen_mult_pattern,
81 vect_recog_widen_sum_pattern,
82 vect_recog_dot_prod_pattern,
83 vect_recog_sad_pattern,
84 vect_recog_pow_pattern,
85 vect_recog_widen_shift_pattern,
86 vect_recog_over_widening_pattern,
87 vect_recog_rotate_pattern,
88 vect_recog_vector_vector_shift_pattern,
89 vect_recog_divmod_pattern,
90 vect_recog_mult_pattern,
91 vect_recog_mixed_size_cond_pattern,
92 vect_recog_bool_pattern};
94 static inline void
95 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
97 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
98 stmt);
101 static inline void
102 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
104 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
105 append_pattern_def_seq (stmt_info, stmt);
108 /* Check whether STMT2 is in the same loop or basic block as STMT1.
109 Which of the two applies depends on whether we're currently doing
110 loop-based or basic-block-based vectorization, as determined by
111 the vinfo_for_stmt for STMT1 (which must be defined).
113 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
114 to be defined as well. */
116 static bool
117 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
119 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
120 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
121 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
123 if (!gimple_bb (stmt2))
124 return false;
126 if (loop_vinfo)
128 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
129 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
130 return false;
132 else
134 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
135 || gimple_code (stmt2) == GIMPLE_PHI)
136 return false;
139 gcc_assert (vinfo_for_stmt (stmt2));
140 return true;
143 /* If the LHS of DEF_STMT has a single use, and that statement is
144 in the same loop or basic block, return it. */
146 static gimple *
147 vect_single_imm_use (gimple *def_stmt)
149 tree lhs = gimple_assign_lhs (def_stmt);
150 use_operand_p use_p;
151 gimple *use_stmt;
153 if (!single_imm_use (lhs, &use_p, &use_stmt))
154 return NULL;
156 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
157 return NULL;
159 return use_stmt;
162 /* Check whether NAME, an ssa-name used in USE_STMT,
163 is a result of a type promotion, such that:
164 DEF_STMT: NAME = NOP (name0)
165 If CHECK_SIGN is TRUE, check that either both types are signed or both are
166 unsigned. */
168 static bool
169 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
170 tree *orig_type, gimple **def_stmt, bool *promotion)
172 gimple *dummy_gimple;
173 stmt_vec_info stmt_vinfo;
174 tree type = TREE_TYPE (name);
175 tree oprnd0;
176 enum vect_def_type dt;
178 stmt_vinfo = vinfo_for_stmt (use_stmt);
179 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
180 return false;
182 if (dt != vect_internal_def
183 && dt != vect_external_def && dt != vect_constant_def)
184 return false;
186 if (!*def_stmt)
187 return false;
189 if (!is_gimple_assign (*def_stmt))
190 return false;
192 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
193 return false;
195 oprnd0 = gimple_assign_rhs1 (*def_stmt);
197 *orig_type = TREE_TYPE (oprnd0);
198 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
199 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
200 return false;
202 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
203 *promotion = true;
204 else
205 *promotion = false;
207 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
208 return false;
210 return true;
213 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
214 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
216 static tree
217 vect_recog_temp_ssa_var (tree type, gimple *stmt)
219 return make_temp_ssa_name (type, stmt, "patt");
222 /* Function vect_recog_dot_prod_pattern
224 Try to find the following pattern:
226 type x_t, y_t;
227 TYPE1 prod;
228 TYPE2 sum = init;
229 loop:
230 sum_0 = phi <init, sum_1>
231 S1 x_t = ...
232 S2 y_t = ...
233 S3 x_T = (TYPE1) x_t;
234 S4 y_T = (TYPE1) y_t;
235 S5 prod = x_T * y_T;
236 [S6 prod = (TYPE2) prod; #optional]
237 S7 sum_1 = prod + sum_0;
239 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
240 same size of 'TYPE1' or bigger. This is a special case of a reduction
241 computation.
243 Input:
245 * STMTS: Contains a stmt from which the pattern search begins. In the
246 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
247 will be detected.
249 Output:
251 * TYPE_IN: The type of the input arguments to the pattern.
253 * TYPE_OUT: The type of the output of this pattern.
255 * Return value: A new stmt that will be used to replace the sequence of
256 stmts that constitute the pattern. In this case it will be:
257 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
259 Note: The dot-prod idiom is a widening reduction pattern that is
260 vectorized without preserving all the intermediate results. It
261 produces only N/2 (widened) results (by summing up pairs of
262 intermediate results) rather than all N results. Therefore, we
263 cannot allow this pattern when we want to get all the results and in
264 the correct order (as is the case when this computation is in an
265 inner-loop nested in an outer-loop that us being vectorized). */
267 static gimple *
268 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
269 tree *type_out)
271 gimple *stmt, *last_stmt = (*stmts)[0];
272 tree oprnd0, oprnd1;
273 tree oprnd00, oprnd01;
274 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
275 tree type, half_type;
276 gimple *pattern_stmt;
277 tree prod_type;
278 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
279 struct loop *loop;
280 tree var;
281 bool promotion;
283 if (!loop_info)
284 return NULL;
286 loop = LOOP_VINFO_LOOP (loop_info);
288 /* We don't allow changing the order of the computation in the inner-loop
289 when doing outer-loop vectorization. */
290 if (loop && nested_in_vect_loop_p (loop, last_stmt))
291 return NULL;
293 if (!is_gimple_assign (last_stmt))
294 return NULL;
296 type = gimple_expr_type (last_stmt);
298 /* Look for the following pattern
299 DX = (TYPE1) X;
300 DY = (TYPE1) Y;
301 DPROD = DX * DY;
302 DDPROD = (TYPE2) DPROD;
303 sum_1 = DDPROD + sum_0;
304 In which
305 - DX is double the size of X
306 - DY is double the size of Y
307 - DX, DY, DPROD all have the same type
308 - sum is the same size of DPROD or bigger
309 - sum has been recognized as a reduction variable.
311 This is equivalent to:
312 DPROD = X w* Y; #widen mult
313 sum_1 = DPROD w+ sum_0; #widen summation
315 DPROD = X w* Y; #widen mult
316 sum_1 = DPROD + sum_0; #summation
319 /* Starting from LAST_STMT, follow the defs of its uses in search
320 of the above pattern. */
322 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
323 return NULL;
325 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
327 /* Has been detected as widening-summation? */
329 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
330 type = gimple_expr_type (stmt);
331 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
332 return NULL;
333 oprnd0 = gimple_assign_rhs1 (stmt);
334 oprnd1 = gimple_assign_rhs2 (stmt);
335 half_type = TREE_TYPE (oprnd0);
337 else
339 gimple *def_stmt;
341 oprnd0 = gimple_assign_rhs1 (last_stmt);
342 oprnd1 = gimple_assign_rhs2 (last_stmt);
343 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
344 || !types_compatible_p (TREE_TYPE (oprnd1), type))
345 return NULL;
346 stmt = last_stmt;
348 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
349 &promotion)
350 && promotion)
352 stmt = def_stmt;
353 oprnd0 = gimple_assign_rhs1 (stmt);
355 else
356 half_type = type;
359 /* So far so good. Since last_stmt was detected as a (summation) reduction,
360 we know that oprnd1 is the reduction variable (defined by a loop-header
361 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
362 Left to check that oprnd0 is defined by a (widen_)mult_expr */
363 if (TREE_CODE (oprnd0) != SSA_NAME)
364 return NULL;
366 prod_type = half_type;
367 stmt = SSA_NAME_DEF_STMT (oprnd0);
369 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
370 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
371 return NULL;
373 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
374 inside the loop (in case we are analyzing an outer-loop). */
375 if (!is_gimple_assign (stmt))
376 return NULL;
377 stmt_vinfo = vinfo_for_stmt (stmt);
378 gcc_assert (stmt_vinfo);
379 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
380 return NULL;
381 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
382 return NULL;
383 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
385 /* Has been detected as a widening multiplication? */
387 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
388 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
389 return NULL;
390 stmt_vinfo = vinfo_for_stmt (stmt);
391 gcc_assert (stmt_vinfo);
392 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
393 oprnd00 = gimple_assign_rhs1 (stmt);
394 oprnd01 = gimple_assign_rhs2 (stmt);
395 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
396 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
398 else
400 tree half_type0, half_type1;
401 gimple *def_stmt;
402 tree oprnd0, oprnd1;
404 oprnd0 = gimple_assign_rhs1 (stmt);
405 oprnd1 = gimple_assign_rhs2 (stmt);
406 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
407 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
408 return NULL;
409 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
410 &promotion)
411 || !promotion)
412 return NULL;
413 oprnd00 = gimple_assign_rhs1 (def_stmt);
414 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
415 &promotion)
416 || !promotion)
417 return NULL;
418 oprnd01 = gimple_assign_rhs1 (def_stmt);
419 if (!types_compatible_p (half_type0, half_type1))
420 return NULL;
421 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
422 return NULL;
425 half_type = TREE_TYPE (oprnd00);
426 *type_in = half_type;
427 *type_out = type;
429 /* Pattern detected. Create a stmt to be used to replace the pattern: */
430 var = vect_recog_temp_ssa_var (type, NULL);
431 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
432 oprnd00, oprnd01, oprnd1);
434 if (dump_enabled_p ())
436 dump_printf_loc (MSG_NOTE, vect_location,
437 "vect_recog_dot_prod_pattern: detected: ");
438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
439 dump_printf (MSG_NOTE, "\n");
442 return pattern_stmt;
446 /* Function vect_recog_sad_pattern
448 Try to find the following Sum of Absolute Difference (SAD) pattern:
450 type x_t, y_t;
451 signed TYPE1 diff, abs_diff;
452 TYPE2 sum = init;
453 loop:
454 sum_0 = phi <init, sum_1>
455 S1 x_t = ...
456 S2 y_t = ...
457 S3 x_T = (TYPE1) x_t;
458 S4 y_T = (TYPE1) y_t;
459 S5 diff = x_T - y_T;
460 S6 abs_diff = ABS_EXPR <diff>;
461 [S7 abs_diff = (TYPE2) abs_diff; #optional]
462 S8 sum_1 = abs_diff + sum_0;
464 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
465 same size of 'TYPE1' or bigger. This is a special case of a reduction
466 computation.
468 Input:
470 * STMTS: Contains a stmt from which the pattern search begins. In the
471 example, when this function is called with S8, the pattern
472 {S3,S4,S5,S6,S7,S8} will be detected.
474 Output:
476 * TYPE_IN: The type of the input arguments to the pattern.
478 * TYPE_OUT: The type of the output of this pattern.
480 * Return value: A new stmt that will be used to replace the sequence of
481 stmts that constitute the pattern. In this case it will be:
482 SAD_EXPR <x_t, y_t, sum_0>
485 static gimple *
486 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
487 tree *type_out)
489 gimple *last_stmt = (*stmts)[0];
490 tree sad_oprnd0, sad_oprnd1;
491 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
492 tree half_type;
493 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
494 struct loop *loop;
495 bool promotion;
497 if (!loop_info)
498 return NULL;
500 loop = LOOP_VINFO_LOOP (loop_info);
502 /* We don't allow changing the order of the computation in the inner-loop
503 when doing outer-loop vectorization. */
504 if (loop && nested_in_vect_loop_p (loop, last_stmt))
505 return NULL;
507 if (!is_gimple_assign (last_stmt))
508 return NULL;
510 tree sum_type = gimple_expr_type (last_stmt);
512 /* Look for the following pattern
513 DX = (TYPE1) X;
514 DY = (TYPE1) Y;
515 DDIFF = DX - DY;
516 DAD = ABS_EXPR <DDIFF>;
517 DDPROD = (TYPE2) DPROD;
518 sum_1 = DAD + sum_0;
519 In which
520 - DX is at least double the size of X
521 - DY is at least double the size of Y
522 - DX, DY, DDIFF, DAD all have the same type
523 - sum is the same size of DAD or bigger
524 - sum has been recognized as a reduction variable.
526 This is equivalent to:
527 DDIFF = X w- Y; #widen sub
528 DAD = ABS_EXPR <DDIFF>;
529 sum_1 = DAD w+ sum_0; #widen summation
531 DDIFF = X w- Y; #widen sub
532 DAD = ABS_EXPR <DDIFF>;
533 sum_1 = DAD + sum_0; #summation
536 /* Starting from LAST_STMT, follow the defs of its uses in search
537 of the above pattern. */
539 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
540 return NULL;
542 tree plus_oprnd0, plus_oprnd1;
544 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
546 /* Has been detected as widening-summation? */
548 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
549 sum_type = gimple_expr_type (stmt);
550 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
551 return NULL;
552 plus_oprnd0 = gimple_assign_rhs1 (stmt);
553 plus_oprnd1 = gimple_assign_rhs2 (stmt);
554 half_type = TREE_TYPE (plus_oprnd0);
556 else
558 gimple *def_stmt;
560 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
561 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
562 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
563 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
564 return NULL;
566 /* The type conversion could be promotion, demotion,
567 or just signed -> unsigned. */
568 if (type_conversion_p (plus_oprnd0, last_stmt, false,
569 &half_type, &def_stmt, &promotion))
570 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
571 else
572 half_type = sum_type;
575 /* So far so good. Since last_stmt was detected as a (summation) reduction,
576 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
577 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
578 Then check that plus_oprnd0 is defined by an abs_expr. */
580 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
581 return NULL;
583 tree abs_type = half_type;
584 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
586 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
587 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
588 return NULL;
590 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
591 inside the loop (in case we are analyzing an outer-loop). */
592 if (!is_gimple_assign (abs_stmt))
593 return NULL;
595 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
596 gcc_assert (abs_stmt_vinfo);
597 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
598 return NULL;
599 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
600 return NULL;
602 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
603 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
604 return NULL;
605 if (TYPE_UNSIGNED (abs_type))
606 return NULL;
608 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
610 if (TREE_CODE (abs_oprnd) != SSA_NAME)
611 return NULL;
613 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
615 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
616 if (!gimple_bb (diff_stmt)
617 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
618 return NULL;
620 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
621 inside the loop (in case we are analyzing an outer-loop). */
622 if (!is_gimple_assign (diff_stmt))
623 return NULL;
625 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
626 gcc_assert (diff_stmt_vinfo);
627 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
628 return NULL;
629 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
630 return NULL;
632 tree half_type0, half_type1;
633 gimple *def_stmt;
635 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
636 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
638 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
639 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
640 return NULL;
641 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
642 &half_type0, &def_stmt, &promotion)
643 || !promotion)
644 return NULL;
645 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
647 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
648 &half_type1, &def_stmt, &promotion)
649 || !promotion)
650 return NULL;
651 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
653 if (!types_compatible_p (half_type0, half_type1))
654 return NULL;
655 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
656 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
657 return NULL;
659 *type_in = TREE_TYPE (sad_oprnd0);
660 *type_out = sum_type;
662 /* Pattern detected. Create a stmt to be used to replace the pattern: */
663 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
664 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
665 sad_oprnd1, plus_oprnd1);
667 if (dump_enabled_p ())
669 dump_printf_loc (MSG_NOTE, vect_location,
670 "vect_recog_sad_pattern: detected: ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
672 dump_printf (MSG_NOTE, "\n");
675 return pattern_stmt;
679 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
680 and LSHIFT_EXPR.
682 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
683 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
685 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
686 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
687 that satisfies the above restrictions, we can perform a widening opeartion
688 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
689 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
691 static bool
692 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
693 tree const_oprnd, tree *oprnd,
694 gimple **wstmt, tree type,
695 tree *half_type, gimple *def_stmt)
697 tree new_type, new_oprnd;
699 if (code != MULT_EXPR && code != LSHIFT_EXPR)
700 return false;
702 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
703 || (code == LSHIFT_EXPR
704 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
705 != 1))
706 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
708 /* CONST_OPRND is a constant of HALF_TYPE. */
709 *oprnd = gimple_assign_rhs1 (def_stmt);
710 return true;
713 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
714 return false;
716 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
717 return false;
719 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
720 a type 2 times bigger than HALF_TYPE. */
721 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
722 TYPE_UNSIGNED (type));
723 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
724 || (code == LSHIFT_EXPR
725 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
726 return false;
728 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
729 *oprnd = gimple_assign_rhs1 (def_stmt);
730 new_oprnd = make_ssa_name (new_type);
731 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
732 *oprnd = new_oprnd;
734 *half_type = new_type;
735 return true;
739 /* Function vect_recog_widen_mult_pattern
741 Try to find the following pattern:
743 type1 a_t;
744 type2 b_t;
745 TYPE a_T, b_T, prod_T;
747 S1 a_t = ;
748 S2 b_t = ;
749 S3 a_T = (TYPE) a_t;
750 S4 b_T = (TYPE) b_t;
751 S5 prod_T = a_T * b_T;
753 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
755 Also detect unsigned cases:
757 unsigned type1 a_t;
758 unsigned type2 b_t;
759 unsigned TYPE u_prod_T;
760 TYPE a_T, b_T, prod_T;
762 S1 a_t = ;
763 S2 b_t = ;
764 S3 a_T = (TYPE) a_t;
765 S4 b_T = (TYPE) b_t;
766 S5 prod_T = a_T * b_T;
767 S6 u_prod_T = (unsigned TYPE) prod_T;
769 and multiplication by constants:
771 type a_t;
772 TYPE a_T, prod_T;
774 S1 a_t = ;
775 S3 a_T = (TYPE) a_t;
776 S5 prod_T = a_T * CONST;
778 A special case of multiplication by constants is when 'TYPE' is 4 times
779 bigger than 'type', but CONST fits an intermediate type 2 times smaller
780 than 'TYPE'. In that case we create an additional pattern stmt for S3
781 to create a variable of the intermediate type, and perform widen-mult
782 on the intermediate type as well:
784 type a_t;
785 interm_type a_it;
786 TYPE a_T, prod_T, prod_T';
788 S1 a_t = ;
789 S3 a_T = (TYPE) a_t;
790 '--> a_it = (interm_type) a_t;
791 S5 prod_T = a_T * CONST;
792 '--> prod_T' = a_it w* CONST;
794 Input/Output:
796 * STMTS: Contains a stmt from which the pattern search begins. In the
797 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
798 is detected. In case of unsigned widen-mult, the original stmt (S5) is
799 replaced with S6 in STMTS. In case of multiplication by a constant
800 of an intermediate type (the last case above), STMTS also contains S3
801 (inserted before S5).
803 Output:
805 * TYPE_IN: The type of the input arguments to the pattern.
807 * TYPE_OUT: The type of the output of this pattern.
809 * Return value: A new stmt that will be used to replace the sequence of
810 stmts that constitute the pattern. In this case it will be:
811 WIDEN_MULT <a_t, b_t>
812 If the result of WIDEN_MULT needs to be converted to a larger type, the
813 returned stmt will be this type conversion stmt.
816 static gimple *
817 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
818 tree *type_in, tree *type_out)
820 gimple *last_stmt = stmts->pop ();
821 gimple *def_stmt0, *def_stmt1;
822 tree oprnd0, oprnd1;
823 tree type, half_type0, half_type1;
824 gimple *new_stmt = NULL, *pattern_stmt = NULL;
825 tree vectype, vecitype;
826 tree var;
827 enum tree_code dummy_code;
828 int dummy_int;
829 vec<tree> dummy_vec;
830 bool op1_ok;
831 bool promotion;
833 if (!is_gimple_assign (last_stmt))
834 return NULL;
836 type = gimple_expr_type (last_stmt);
838 /* Starting from LAST_STMT, follow the defs of its uses in search
839 of the above pattern. */
841 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
842 return NULL;
844 oprnd0 = gimple_assign_rhs1 (last_stmt);
845 oprnd1 = gimple_assign_rhs2 (last_stmt);
846 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
847 || !types_compatible_p (TREE_TYPE (oprnd1), type))
848 return NULL;
850 /* Check argument 0. */
851 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
852 &promotion)
853 || !promotion)
854 return NULL;
855 /* Check argument 1. */
856 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
857 &def_stmt1, &promotion);
859 if (op1_ok && promotion)
861 oprnd0 = gimple_assign_rhs1 (def_stmt0);
862 oprnd1 = gimple_assign_rhs1 (def_stmt1);
864 else
866 if (TREE_CODE (oprnd1) == INTEGER_CST
867 && TREE_CODE (half_type0) == INTEGER_TYPE
868 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
869 &oprnd0, &new_stmt, type,
870 &half_type0, def_stmt0))
872 half_type1 = half_type0;
873 oprnd1 = fold_convert (half_type1, oprnd1);
875 else
876 return NULL;
879 /* If the two arguments have different sizes, convert the one with
880 the smaller type into the larger type. */
881 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
883 /* If we already used up the single-stmt slot give up. */
884 if (new_stmt)
885 return NULL;
887 tree* oprnd = NULL;
888 gimple *def_stmt = NULL;
890 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
892 def_stmt = def_stmt0;
893 half_type0 = half_type1;
894 oprnd = &oprnd0;
896 else
898 def_stmt = def_stmt1;
899 half_type1 = half_type0;
900 oprnd = &oprnd1;
903 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
904 tree new_oprnd = make_ssa_name (half_type0);
905 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
906 *oprnd = new_oprnd;
909 /* Handle unsigned case. Look for
910 S6 u_prod_T = (unsigned TYPE) prod_T;
911 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
912 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
914 gimple *use_stmt;
915 tree use_lhs;
916 tree use_type;
918 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
919 return NULL;
921 use_stmt = vect_single_imm_use (last_stmt);
922 if (!use_stmt || !is_gimple_assign (use_stmt)
923 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
924 return NULL;
926 use_lhs = gimple_assign_lhs (use_stmt);
927 use_type = TREE_TYPE (use_lhs);
928 if (!INTEGRAL_TYPE_P (use_type)
929 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
930 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
931 return NULL;
933 type = use_type;
934 last_stmt = use_stmt;
937 if (!types_compatible_p (half_type0, half_type1))
938 return NULL;
940 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
941 to get an intermediate result of type ITYPE. In this case we need
942 to build a statement to convert this intermediate result to type TYPE. */
943 tree itype = type;
944 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
945 itype = build_nonstandard_integer_type
946 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
947 TYPE_UNSIGNED (type));
949 /* Pattern detected. */
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_recog_widen_mult_pattern: detected:\n");
954 /* Check target support */
955 vectype = get_vectype_for_scalar_type (half_type0);
956 vecitype = get_vectype_for_scalar_type (itype);
957 if (!vectype
958 || !vecitype
959 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
960 vecitype, vectype,
961 &dummy_code, &dummy_code,
962 &dummy_int, &dummy_vec))
963 return NULL;
965 *type_in = vectype;
966 *type_out = get_vectype_for_scalar_type (type);
968 /* Pattern supported. Create a stmt to be used to replace the pattern: */
969 var = vect_recog_temp_ssa_var (itype, NULL);
970 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
972 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
973 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
975 /* If the original two operands have different sizes, we may need to convert
976 the smaller one into the larget type. If this is the case, at this point
977 the new stmt is already built. */
978 if (new_stmt)
980 append_pattern_def_seq (stmt_vinfo, new_stmt);
981 stmt_vec_info new_stmt_info
982 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
983 set_vinfo_for_stmt (new_stmt, new_stmt_info);
984 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
987 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
988 the result of the widen-mult operation into type TYPE. */
989 if (itype != type)
991 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
992 stmt_vec_info pattern_stmt_info
993 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
994 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
995 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
996 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
997 NOP_EXPR,
998 gimple_assign_lhs (pattern_stmt));
1001 if (dump_enabled_p ())
1002 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1004 stmts->safe_push (last_stmt);
1005 return pattern_stmt;
1009 /* Function vect_recog_pow_pattern
1011 Try to find the following pattern:
1013 x = POW (y, N);
1015 with POW being one of pow, powf, powi, powif and N being
1016 either 2 or 0.5.
1018 Input:
1020 * LAST_STMT: A stmt from which the pattern search begins.
1022 Output:
1024 * TYPE_IN: The type of the input arguments to the pattern.
1026 * TYPE_OUT: The type of the output of this pattern.
1028 * Return value: A new stmt that will be used to replace the sequence of
1029 stmts that constitute the pattern. In this case it will be:
1030 x = x * x
1032 x = sqrt (x)
1035 static gimple *
1036 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1037 tree *type_out)
1039 gimple *last_stmt = (*stmts)[0];
1040 tree fn, base, exp = NULL;
1041 gimple *stmt;
1042 tree var;
1044 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1045 return NULL;
1047 fn = gimple_call_fndecl (last_stmt);
1048 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1049 return NULL;
1051 switch (DECL_FUNCTION_CODE (fn))
1053 case BUILT_IN_POWIF:
1054 case BUILT_IN_POWI:
1055 case BUILT_IN_POWF:
1056 case BUILT_IN_POW:
1057 base = gimple_call_arg (last_stmt, 0);
1058 exp = gimple_call_arg (last_stmt, 1);
1059 if (TREE_CODE (exp) != REAL_CST
1060 && TREE_CODE (exp) != INTEGER_CST)
1061 return NULL;
1062 break;
1064 default:
1065 return NULL;
1068 /* We now have a pow or powi builtin function call with a constant
1069 exponent. */
1071 *type_out = NULL_TREE;
1073 /* Catch squaring. */
1074 if ((tree_fits_shwi_p (exp)
1075 && tree_to_shwi (exp) == 2)
1076 || (TREE_CODE (exp) == REAL_CST
1077 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1079 *type_in = TREE_TYPE (base);
1081 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1082 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1083 return stmt;
1086 /* Catch square root. */
1087 if (TREE_CODE (exp) == REAL_CST
1088 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1090 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1091 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1092 if (*type_in)
1094 gcall *stmt = gimple_build_call (newfn, 1, base);
1095 if (vectorizable_function (stmt, *type_in, *type_in)
1096 != NULL_TREE)
1098 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1099 gimple_call_set_lhs (stmt, var);
1100 return stmt;
1105 return NULL;
1109 /* Function vect_recog_widen_sum_pattern
1111 Try to find the following pattern:
1113 type x_t;
1114 TYPE x_T, sum = init;
1115 loop:
1116 sum_0 = phi <init, sum_1>
1117 S1 x_t = *p;
1118 S2 x_T = (TYPE) x_t;
1119 S3 sum_1 = x_T + sum_0;
1121 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1122 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1123 a special case of a reduction computation.
1125 Input:
1127 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1128 when this function is called with S3, the pattern {S2,S3} will be detected.
1130 Output:
1132 * TYPE_IN: The type of the input arguments to the pattern.
1134 * TYPE_OUT: The type of the output of this pattern.
1136 * Return value: A new stmt that will be used to replace the sequence of
1137 stmts that constitute the pattern. In this case it will be:
1138 WIDEN_SUM <x_t, sum_0>
1140 Note: The widening-sum idiom is a widening reduction pattern that is
1141 vectorized without preserving all the intermediate results. It
1142 produces only N/2 (widened) results (by summing up pairs of
1143 intermediate results) rather than all N results. Therefore, we
1144 cannot allow this pattern when we want to get all the results and in
1145 the correct order (as is the case when this computation is in an
1146 inner-loop nested in an outer-loop that us being vectorized). */
1148 static gimple *
1149 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1150 tree *type_out)
1152 gimple *stmt, *last_stmt = (*stmts)[0];
1153 tree oprnd0, oprnd1;
1154 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1155 tree type, half_type;
1156 gimple *pattern_stmt;
1157 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1158 struct loop *loop;
1159 tree var;
1160 bool promotion;
1162 if (!loop_info)
1163 return NULL;
1165 loop = LOOP_VINFO_LOOP (loop_info);
1167 /* We don't allow changing the order of the computation in the inner-loop
1168 when doing outer-loop vectorization. */
1169 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1170 return NULL;
1172 if (!is_gimple_assign (last_stmt))
1173 return NULL;
1175 type = gimple_expr_type (last_stmt);
1177 /* Look for the following pattern
1178 DX = (TYPE) X;
1179 sum_1 = DX + sum_0;
1180 In which DX is at least double the size of X, and sum_1 has been
1181 recognized as a reduction variable.
1184 /* Starting from LAST_STMT, follow the defs of its uses in search
1185 of the above pattern. */
1187 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1188 return NULL;
1190 oprnd0 = gimple_assign_rhs1 (last_stmt);
1191 oprnd1 = gimple_assign_rhs2 (last_stmt);
1192 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1193 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1194 return NULL;
1196 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1197 we know that oprnd1 is the reduction variable (defined by a loop-header
1198 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1199 Left to check that oprnd0 is defined by a cast from type 'type' to type
1200 'TYPE'. */
1202 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1203 &promotion)
1204 || !promotion)
1205 return NULL;
1207 oprnd0 = gimple_assign_rhs1 (stmt);
1208 *type_in = half_type;
1209 *type_out = type;
1211 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1212 var = vect_recog_temp_ssa_var (type, NULL);
1213 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1215 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE, vect_location,
1218 "vect_recog_widen_sum_pattern: detected: ");
1219 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1220 dump_printf (MSG_NOTE, "\n");
1223 return pattern_stmt;
1227 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1229 Input:
1230 STMT - a statement to check.
1231 DEF - we support operations with two operands, one of which is constant.
1232 The other operand can be defined by a demotion operation, or by a
1233 previous statement in a sequence of over-promoted operations. In the
1234 later case DEF is used to replace that operand. (It is defined by a
1235 pattern statement we created for the previous statement in the
1236 sequence).
1238 Input/output:
1239 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1240 NULL, it's the type of DEF.
1241 STMTS - additional pattern statements. If a pattern statement (type
1242 conversion) is created in this function, its original statement is
1243 added to STMTS.
1245 Output:
1246 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1247 operands to use in the new pattern statement for STMT (will be created
1248 in vect_recog_over_widening_pattern ()).
1249 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1250 statements for STMT: the first one is a type promotion and the second
1251 one is the operation itself. We return the type promotion statement
1252 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1253 the second pattern statement. */
1255 static bool
1256 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1257 tree *op0, tree *op1, gimple **new_def_stmt,
1258 vec<gimple *> *stmts)
1260 enum tree_code code;
1261 tree const_oprnd, oprnd;
1262 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1263 gimple *def_stmt, *new_stmt;
1264 bool first = false;
1265 bool promotion;
1267 *op0 = NULL_TREE;
1268 *op1 = NULL_TREE;
1269 *new_def_stmt = NULL;
1271 if (!is_gimple_assign (stmt))
1272 return false;
1274 code = gimple_assign_rhs_code (stmt);
1275 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1276 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1277 return false;
1279 oprnd = gimple_assign_rhs1 (stmt);
1280 const_oprnd = gimple_assign_rhs2 (stmt);
1281 type = gimple_expr_type (stmt);
1283 if (TREE_CODE (oprnd) != SSA_NAME
1284 || TREE_CODE (const_oprnd) != INTEGER_CST)
1285 return false;
1287 /* If oprnd has other uses besides that in stmt we cannot mark it
1288 as being part of a pattern only. */
1289 if (!has_single_use (oprnd))
1290 return false;
1292 /* If we are in the middle of a sequence, we use DEF from a previous
1293 statement. Otherwise, OPRND has to be a result of type promotion. */
1294 if (*new_type)
1296 half_type = *new_type;
1297 oprnd = def;
1299 else
1301 first = true;
1302 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1303 &promotion)
1304 || !promotion
1305 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1306 return false;
1309 /* Can we perform the operation on a smaller type? */
1310 switch (code)
1312 case BIT_IOR_EXPR:
1313 case BIT_XOR_EXPR:
1314 case BIT_AND_EXPR:
1315 if (!int_fits_type_p (const_oprnd, half_type))
1317 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1318 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1319 return false;
1321 interm_type = build_nonstandard_integer_type (
1322 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1323 if (!int_fits_type_p (const_oprnd, interm_type))
1324 return false;
1327 break;
1329 case LSHIFT_EXPR:
1330 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1331 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1332 return false;
1334 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1335 (e.g., if the original value was char, the shift amount is at most 8
1336 if we want to use short). */
1337 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1338 return false;
1340 interm_type = build_nonstandard_integer_type (
1341 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1343 if (!vect_supportable_shift (code, interm_type))
1344 return false;
1346 break;
1348 case RSHIFT_EXPR:
1349 if (vect_supportable_shift (code, half_type))
1350 break;
1352 /* Try intermediate type - HALF_TYPE is not supported. */
1353 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1354 return false;
1356 interm_type = build_nonstandard_integer_type (
1357 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1359 if (!vect_supportable_shift (code, interm_type))
1360 return false;
1362 break;
1364 default:
1365 gcc_unreachable ();
1368 /* There are four possible cases:
1369 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1370 the first statement in the sequence)
1371 a. The original, HALF_TYPE, is not enough - we replace the promotion
1372 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1373 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1374 promotion.
1375 2. OPRND is defined by a pattern statement we created.
1376 a. Its type is not sufficient for the operation, we create a new stmt:
1377 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1378 this statement in NEW_DEF_STMT, and it is later put in
1379 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1380 b. OPRND is good to use in the new statement. */
1381 if (first)
1383 if (interm_type)
1385 /* Replace the original type conversion HALF_TYPE->TYPE with
1386 HALF_TYPE->INTERM_TYPE. */
1387 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1389 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1390 /* Check if the already created pattern stmt is what we need. */
1391 if (!is_gimple_assign (new_stmt)
1392 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1393 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1394 return false;
1396 stmts->safe_push (def_stmt);
1397 oprnd = gimple_assign_lhs (new_stmt);
1399 else
1401 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1402 oprnd = gimple_assign_rhs1 (def_stmt);
1403 new_oprnd = make_ssa_name (interm_type);
1404 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1405 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1406 stmts->safe_push (def_stmt);
1407 oprnd = new_oprnd;
1410 else
1412 /* Retrieve the operand before the type promotion. */
1413 oprnd = gimple_assign_rhs1 (def_stmt);
1416 else
1418 if (interm_type)
1420 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1421 new_oprnd = make_ssa_name (interm_type);
1422 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1423 oprnd = new_oprnd;
1424 *new_def_stmt = new_stmt;
1427 /* Otherwise, OPRND is already set. */
1430 if (interm_type)
1431 *new_type = interm_type;
1432 else
1433 *new_type = half_type;
1435 *op0 = oprnd;
1436 *op1 = fold_convert (*new_type, const_oprnd);
1438 return true;
1442 /* Try to find a statement or a sequence of statements that can be performed
1443 on a smaller type:
1445 type x_t;
1446 TYPE x_T, res0_T, res1_T;
1447 loop:
1448 S1 x_t = *p;
1449 S2 x_T = (TYPE) x_t;
1450 S3 res0_T = op (x_T, C0);
1451 S4 res1_T = op (res0_T, C1);
1452 S5 ... = () res1_T; - type demotion
1454 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1455 constants.
1456 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1457 be 'type' or some intermediate type. For now, we expect S5 to be a type
1458 demotion operation. We also check that S3 and S4 have only one use. */
1460 static gimple *
1461 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1462 tree *type_in, tree *type_out)
1464 gimple *stmt = stmts->pop ();
1465 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1466 *use_stmt = NULL;
1467 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1468 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1469 bool first;
1470 tree type = NULL;
1472 first = true;
1473 while (1)
1475 if (!vinfo_for_stmt (stmt)
1476 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1477 return NULL;
1479 new_def_stmt = NULL;
1480 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1481 &op0, &op1, &new_def_stmt,
1482 stmts))
1484 if (first)
1485 return NULL;
1486 else
1487 break;
1490 /* STMT can be performed on a smaller type. Check its uses. */
1491 use_stmt = vect_single_imm_use (stmt);
1492 if (!use_stmt || !is_gimple_assign (use_stmt))
1493 return NULL;
1495 /* Create pattern statement for STMT. */
1496 vectype = get_vectype_for_scalar_type (new_type);
1497 if (!vectype)
1498 return NULL;
1500 /* We want to collect all the statements for which we create pattern
1501 statetments, except for the case when the last statement in the
1502 sequence doesn't have a corresponding pattern statement. In such
1503 case we associate the last pattern statement with the last statement
1504 in the sequence. Therefore, we only add the original statement to
1505 the list if we know that it is not the last. */
1506 if (prev_stmt)
1507 stmts->safe_push (prev_stmt);
1509 var = vect_recog_temp_ssa_var (new_type, NULL);
1510 pattern_stmt
1511 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1512 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1513 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1515 if (dump_enabled_p ())
1517 dump_printf_loc (MSG_NOTE, vect_location,
1518 "created pattern stmt: ");
1519 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1520 dump_printf (MSG_NOTE, "\n");
1523 type = gimple_expr_type (stmt);
1524 prev_stmt = stmt;
1525 stmt = use_stmt;
1527 first = false;
1530 /* We got a sequence. We expect it to end with a type demotion operation.
1531 Otherwise, we quit (for now). There are three possible cases: the
1532 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1533 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1534 NEW_TYPE differs (we create a new conversion statement). */
1535 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1537 use_lhs = gimple_assign_lhs (use_stmt);
1538 use_type = TREE_TYPE (use_lhs);
1539 /* Support only type demotion or signedess change. */
1540 if (!INTEGRAL_TYPE_P (use_type)
1541 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1542 return NULL;
1544 /* Check that NEW_TYPE is not bigger than the conversion result. */
1545 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1546 return NULL;
1548 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1549 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1551 /* Create NEW_TYPE->USE_TYPE conversion. */
1552 new_oprnd = make_ssa_name (use_type);
1553 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1554 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1556 *type_in = get_vectype_for_scalar_type (new_type);
1557 *type_out = get_vectype_for_scalar_type (use_type);
1559 /* We created a pattern statement for the last statement in the
1560 sequence, so we don't need to associate it with the pattern
1561 statement created for PREV_STMT. Therefore, we add PREV_STMT
1562 to the list in order to mark it later in vect_pattern_recog_1. */
1563 if (prev_stmt)
1564 stmts->safe_push (prev_stmt);
1566 else
1568 if (prev_stmt)
1569 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1570 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1572 *type_in = vectype;
1573 *type_out = NULL_TREE;
1576 stmts->safe_push (use_stmt);
1578 else
1579 /* TODO: support general case, create a conversion to the correct type. */
1580 return NULL;
1582 /* Pattern detected. */
1583 if (dump_enabled_p ())
1585 dump_printf_loc (MSG_NOTE, vect_location,
1586 "vect_recog_over_widening_pattern: detected: ");
1587 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1588 dump_printf (MSG_NOTE, "\n");
1591 return pattern_stmt;
1594 /* Detect widening shift pattern:
1596 type a_t;
1597 TYPE a_T, res_T;
1599 S1 a_t = ;
1600 S2 a_T = (TYPE) a_t;
1601 S3 res_T = a_T << CONST;
1603 where type 'TYPE' is at least double the size of type 'type'.
1605 Also detect cases where the shift result is immediately converted
1606 to another type 'result_type' that is no larger in size than 'TYPE'.
1607 In those cases we perform a widen-shift that directly results in
1608 'result_type', to avoid a possible over-widening situation:
1610 type a_t;
1611 TYPE a_T, res_T;
1612 result_type res_result;
1614 S1 a_t = ;
1615 S2 a_T = (TYPE) a_t;
1616 S3 res_T = a_T << CONST;
1617 S4 res_result = (result_type) res_T;
1618 '--> res_result' = a_t w<< CONST;
1620 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1621 create an additional pattern stmt for S2 to create a variable of an
1622 intermediate type, and perform widen-shift on the intermediate type:
1624 type a_t;
1625 interm_type a_it;
1626 TYPE a_T, res_T, res_T';
1628 S1 a_t = ;
1629 S2 a_T = (TYPE) a_t;
1630 '--> a_it = (interm_type) a_t;
1631 S3 res_T = a_T << CONST;
1632 '--> res_T' = a_it <<* CONST;
1634 Input/Output:
1636 * STMTS: Contains a stmt from which the pattern search begins.
1637 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1638 in STMTS. When an intermediate type is used and a pattern statement is
1639 created for S2, we also put S2 here (before S3).
1641 Output:
1643 * TYPE_IN: The type of the input arguments to the pattern.
1645 * TYPE_OUT: The type of the output of this pattern.
1647 * Return value: A new stmt that will be used to replace the sequence of
1648 stmts that constitute the pattern. In this case it will be:
1649 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1651 static gimple *
1652 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1653 tree *type_in, tree *type_out)
1655 gimple *last_stmt = stmts->pop ();
1656 gimple *def_stmt0;
1657 tree oprnd0, oprnd1;
1658 tree type, half_type0;
1659 gimple *pattern_stmt;
1660 tree vectype, vectype_out = NULL_TREE;
1661 tree var;
1662 enum tree_code dummy_code;
1663 int dummy_int;
1664 vec<tree> dummy_vec;
1665 gimple *use_stmt;
1666 bool promotion;
1668 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1669 return NULL;
1671 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1672 return NULL;
1674 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1675 return NULL;
1677 oprnd0 = gimple_assign_rhs1 (last_stmt);
1678 oprnd1 = gimple_assign_rhs2 (last_stmt);
1679 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1680 return NULL;
1682 /* Check operand 0: it has to be defined by a type promotion. */
1683 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1684 &promotion)
1685 || !promotion)
1686 return NULL;
1688 /* Check operand 1: has to be positive. We check that it fits the type
1689 in vect_handle_widen_op_by_const (). */
1690 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1691 return NULL;
1693 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1694 type = gimple_expr_type (last_stmt);
1696 /* Check for subsequent conversion to another type. */
1697 use_stmt = vect_single_imm_use (last_stmt);
1698 if (use_stmt && is_gimple_assign (use_stmt)
1699 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1700 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1702 tree use_lhs = gimple_assign_lhs (use_stmt);
1703 tree use_type = TREE_TYPE (use_lhs);
1705 if (INTEGRAL_TYPE_P (use_type)
1706 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1708 last_stmt = use_stmt;
1709 type = use_type;
1713 /* Check if this a widening operation. */
1714 gimple *wstmt = NULL;
1715 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1716 &oprnd0, &wstmt,
1717 type, &half_type0, def_stmt0))
1718 return NULL;
1720 /* Pattern detected. */
1721 if (dump_enabled_p ())
1722 dump_printf_loc (MSG_NOTE, vect_location,
1723 "vect_recog_widen_shift_pattern: detected:\n");
1725 /* Check target support. */
1726 vectype = get_vectype_for_scalar_type (half_type0);
1727 vectype_out = get_vectype_for_scalar_type (type);
1729 if (!vectype
1730 || !vectype_out
1731 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1732 vectype_out, vectype,
1733 &dummy_code, &dummy_code,
1734 &dummy_int, &dummy_vec))
1735 return NULL;
1737 *type_in = vectype;
1738 *type_out = vectype_out;
1740 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1741 var = vect_recog_temp_ssa_var (type, NULL);
1742 pattern_stmt =
1743 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1744 if (wstmt)
1746 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1747 new_pattern_def_seq (stmt_vinfo, wstmt);
1748 stmt_vec_info new_stmt_info
1749 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1750 set_vinfo_for_stmt (wstmt, new_stmt_info);
1751 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1754 if (dump_enabled_p ())
1755 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1757 stmts->safe_push (last_stmt);
1758 return pattern_stmt;
1761 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1763 type a_t, b_t, c_t;
1765 S0 a_t = b_t r<< c_t;
1767 Input/Output:
1769 * STMTS: Contains a stmt from which the pattern search begins,
1770 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1771 with a sequence:
1773 S1 d_t = -c_t;
1774 S2 e_t = d_t & (B - 1);
1775 S3 f_t = b_t << c_t;
1776 S4 g_t = b_t >> e_t;
1777 S0 a_t = f_t | g_t;
1779 where B is element bitsize of type.
1781 Output:
1783 * TYPE_IN: The type of the input arguments to the pattern.
1785 * TYPE_OUT: The type of the output of this pattern.
1787 * Return value: A new stmt that will be used to replace the rotate
1788 S0 stmt. */
1790 static gimple *
1791 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1793 gimple *last_stmt = stmts->pop ();
1794 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1795 gimple *pattern_stmt, *def_stmt;
1796 enum tree_code rhs_code;
1797 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1798 vec_info *vinfo = stmt_vinfo->vinfo;
1799 enum vect_def_type dt;
1800 optab optab1, optab2;
1801 edge ext_def = NULL;
1803 if (!is_gimple_assign (last_stmt))
1804 return NULL;
1806 rhs_code = gimple_assign_rhs_code (last_stmt);
1807 switch (rhs_code)
1809 case LROTATE_EXPR:
1810 case RROTATE_EXPR:
1811 break;
1812 default:
1813 return NULL;
1816 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1817 return NULL;
1819 lhs = gimple_assign_lhs (last_stmt);
1820 oprnd0 = gimple_assign_rhs1 (last_stmt);
1821 type = TREE_TYPE (oprnd0);
1822 oprnd1 = gimple_assign_rhs2 (last_stmt);
1823 if (TREE_CODE (oprnd0) != SSA_NAME
1824 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1825 || !INTEGRAL_TYPE_P (type)
1826 || !TYPE_UNSIGNED (type))
1827 return NULL;
1829 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1830 return NULL;
1832 if (dt != vect_internal_def
1833 && dt != vect_constant_def
1834 && dt != vect_external_def)
1835 return NULL;
1837 vectype = get_vectype_for_scalar_type (type);
1838 if (vectype == NULL_TREE)
1839 return NULL;
1841 /* If vector/vector or vector/scalar rotate is supported by the target,
1842 don't do anything here. */
1843 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1844 if (optab1
1845 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1846 return NULL;
1848 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1850 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1851 if (optab2
1852 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1853 return NULL;
1856 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1857 don't do anything here either. */
1858 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1859 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1860 if (!optab1
1861 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1862 || !optab2
1863 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1865 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1866 return NULL;
1867 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1868 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1869 if (!optab1
1870 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1871 || !optab2
1872 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1873 return NULL;
1876 *type_in = vectype;
1877 *type_out = vectype;
1878 if (*type_in == NULL_TREE)
1879 return NULL;
1881 if (dt == vect_external_def
1882 && TREE_CODE (oprnd1) == SSA_NAME
1883 && is_a <loop_vec_info> (vinfo))
1885 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1886 ext_def = loop_preheader_edge (loop);
1887 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1889 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1890 if (bb == NULL
1891 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1892 ext_def = NULL;
1896 def = NULL_TREE;
1897 if (TREE_CODE (oprnd1) == INTEGER_CST
1898 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1899 def = oprnd1;
1900 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1902 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1903 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1904 && TYPE_PRECISION (TREE_TYPE (rhs1))
1905 == TYPE_PRECISION (type))
1906 def = rhs1;
1909 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1910 if (def == NULL_TREE)
1912 def = vect_recog_temp_ssa_var (type, NULL);
1913 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1914 if (ext_def)
1916 basic_block new_bb
1917 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1918 gcc_assert (!new_bb);
1920 else
1921 append_pattern_def_seq (stmt_vinfo, def_stmt);
1923 stype = TREE_TYPE (def);
1925 if (TREE_CODE (def) == INTEGER_CST)
1927 if (!tree_fits_uhwi_p (def)
1928 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1929 || integer_zerop (def))
1930 return NULL;
1931 def2 = build_int_cst (stype,
1932 GET_MODE_PRECISION (TYPE_MODE (type))
1933 - tree_to_uhwi (def));
1935 else
1937 tree vecstype = get_vectype_for_scalar_type (stype);
1938 stmt_vec_info def_stmt_vinfo;
1940 if (vecstype == NULL_TREE)
1941 return NULL;
1942 def2 = vect_recog_temp_ssa_var (stype, NULL);
1943 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1944 if (ext_def)
1946 basic_block new_bb
1947 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1948 gcc_assert (!new_bb);
1950 else
1952 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1953 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1954 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1955 append_pattern_def_seq (stmt_vinfo, def_stmt);
1958 def2 = vect_recog_temp_ssa_var (stype, NULL);
1959 tree mask
1960 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1961 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1962 gimple_assign_lhs (def_stmt), mask);
1963 if (ext_def)
1965 basic_block new_bb
1966 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1967 gcc_assert (!new_bb);
1969 else
1971 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1972 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1973 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1974 append_pattern_def_seq (stmt_vinfo, def_stmt);
1978 var1 = vect_recog_temp_ssa_var (type, NULL);
1979 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1980 ? LSHIFT_EXPR : RSHIFT_EXPR,
1981 oprnd0, def);
1982 append_pattern_def_seq (stmt_vinfo, def_stmt);
1984 var2 = vect_recog_temp_ssa_var (type, NULL);
1985 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1986 ? RSHIFT_EXPR : LSHIFT_EXPR,
1987 oprnd0, def2);
1988 append_pattern_def_seq (stmt_vinfo, def_stmt);
1990 /* Pattern detected. */
1991 if (dump_enabled_p ())
1992 dump_printf_loc (MSG_NOTE, vect_location,
1993 "vect_recog_rotate_pattern: detected:\n");
1995 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1996 var = vect_recog_temp_ssa_var (type, NULL);
1997 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1999 if (dump_enabled_p ())
2000 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2002 stmts->safe_push (last_stmt);
2003 return pattern_stmt;
2006 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2007 vectorized:
2009 type a_t;
2010 TYPE b_T, res_T;
2012 S1 a_t = ;
2013 S2 b_T = ;
2014 S3 res_T = b_T op a_t;
2016 where type 'TYPE' is a type with different size than 'type',
2017 and op is <<, >> or rotate.
2019 Also detect cases:
2021 type a_t;
2022 TYPE b_T, c_T, res_T;
2024 S0 c_T = ;
2025 S1 a_t = (type) c_T;
2026 S2 b_T = ;
2027 S3 res_T = b_T op a_t;
2029 Input/Output:
2031 * STMTS: Contains a stmt from which the pattern search begins,
2032 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2033 with a shift/rotate which has same type on both operands, in the
2034 second case just b_T op c_T, in the first case with added cast
2035 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2037 Output:
2039 * TYPE_IN: The type of the input arguments to the pattern.
2041 * TYPE_OUT: The type of the output of this pattern.
2043 * Return value: A new stmt that will be used to replace the shift/rotate
2044 S3 stmt. */
2046 static gimple *
2047 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2048 tree *type_in, tree *type_out)
2050 gimple *last_stmt = stmts->pop ();
2051 tree oprnd0, oprnd1, lhs, var;
2052 gimple *pattern_stmt, *def_stmt;
2053 enum tree_code rhs_code;
2054 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2055 vec_info *vinfo = stmt_vinfo->vinfo;
2056 enum vect_def_type dt;
2058 if (!is_gimple_assign (last_stmt))
2059 return NULL;
2061 rhs_code = gimple_assign_rhs_code (last_stmt);
2062 switch (rhs_code)
2064 case LSHIFT_EXPR:
2065 case RSHIFT_EXPR:
2066 case LROTATE_EXPR:
2067 case RROTATE_EXPR:
2068 break;
2069 default:
2070 return NULL;
2073 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2074 return NULL;
2076 lhs = gimple_assign_lhs (last_stmt);
2077 oprnd0 = gimple_assign_rhs1 (last_stmt);
2078 oprnd1 = gimple_assign_rhs2 (last_stmt);
2079 if (TREE_CODE (oprnd0) != SSA_NAME
2080 || TREE_CODE (oprnd1) != SSA_NAME
2081 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2082 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2083 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2084 || TYPE_PRECISION (TREE_TYPE (lhs))
2085 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2086 return NULL;
2088 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2089 return NULL;
2091 if (dt != vect_internal_def)
2092 return NULL;
2094 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2095 *type_out = *type_in;
2096 if (*type_in == NULL_TREE)
2097 return NULL;
2099 tree def = NULL_TREE;
2100 if (gimple_assign_cast_p (def_stmt))
2102 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2103 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2104 && TYPE_PRECISION (TREE_TYPE (rhs1))
2105 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2106 def = rhs1;
2109 if (def == NULL_TREE)
2111 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2112 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2113 new_pattern_def_seq (stmt_vinfo, def_stmt);
2116 /* Pattern detected. */
2117 if (dump_enabled_p ())
2118 dump_printf_loc (MSG_NOTE, vect_location,
2119 "vect_recog_vector_vector_shift_pattern: detected:\n");
2121 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2122 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2123 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2125 if (dump_enabled_p ())
2126 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2128 stmts->safe_push (last_stmt);
2129 return pattern_stmt;
2132 /* Detect multiplication by constant which are postive or negatives of power 2,
2133 and convert them to shift patterns.
2135 Mult with constants that are postive power of two.
2136 type a_t;
2137 type b_t
2138 S1: b_t = a_t * n
2142 Mult with constants that are negative power of two.
2143 S2: b_t = a_t * -n
2145 Input/Output:
2147 STMTS: Contains a stmt from which the pattern search begins,
2148 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2149 constant operand is a power of 2.
2150 type a_t, b_t
2151 S1': b_t = a_t << log2 (n)
2153 Convert the mult operation to LSHIFT and followed by a NEGATE
2154 if constant operand is a negative power of 2.
2155 type a_t, b_t, res_T;
2156 S2': b_t = a_t << log2 (n)
2157 S3': res_T = - (b_t)
2159 Output:
2161 * TYPE_IN: The type of the input arguments to the pattern.
2163 * TYPE_OUT: The type of the output of this pattern.
2165 * Return value: A new stmt that will be used to replace the multiplication
2166 S1 or S2 stmt. */
2168 static gimple *
2169 vect_recog_mult_pattern (vec<gimple *> *stmts,
2170 tree *type_in, tree *type_out)
2172 gimple *last_stmt = stmts->pop ();
2173 tree oprnd0, oprnd1, vectype, itype;
2174 gimple *pattern_stmt, *def_stmt;
2175 optab optab;
2176 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2177 int power2_val, power2_neg_val;
2178 tree shift;
2180 if (!is_gimple_assign (last_stmt))
2181 return NULL;
2183 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2184 return NULL;
2186 oprnd0 = gimple_assign_rhs1 (last_stmt);
2187 oprnd1 = gimple_assign_rhs2 (last_stmt);
2188 itype = TREE_TYPE (oprnd0);
2190 if (TREE_CODE (oprnd0) != SSA_NAME
2191 || TREE_CODE (oprnd1) != INTEGER_CST
2192 || !INTEGRAL_TYPE_P (itype)
2193 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2194 return NULL;
2196 vectype = get_vectype_for_scalar_type (itype);
2197 if (vectype == NULL_TREE)
2198 return NULL;
2200 /* If the target can handle vectorized multiplication natively,
2201 don't attempt to optimize this. */
2202 optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2203 if (optab != unknown_optab)
2205 machine_mode vec_mode = TYPE_MODE (vectype);
2206 int icode = (int) optab_handler (optab, vec_mode);
2207 if (icode != CODE_FOR_nothing)
2208 return NULL;
2211 /* If target cannot handle vector left shift then we cannot
2212 optimize and bail out. */
2213 optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2214 if (!optab
2215 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2216 return NULL;
2218 power2_val = wi::exact_log2 (oprnd1);
2219 power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2221 /* Handle constant operands that are postive or negative powers of 2. */
2222 if (power2_val != -1)
2224 shift = build_int_cst (itype, power2_val);
2225 pattern_stmt
2226 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2227 LSHIFT_EXPR, oprnd0, shift);
2229 else if (power2_neg_val != -1)
2231 /* If the target cannot handle vector NEGATE then we cannot
2232 do the optimization. */
2233 optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2234 if (!optab
2235 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2236 return NULL;
2238 shift = build_int_cst (itype, power2_neg_val);
2239 def_stmt
2240 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2241 LSHIFT_EXPR, oprnd0, shift);
2242 new_pattern_def_seq (stmt_vinfo, def_stmt);
2243 pattern_stmt
2244 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2245 NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2247 else
2248 return NULL;
2250 /* Pattern detected. */
2251 if (dump_enabled_p ())
2252 dump_printf_loc (MSG_NOTE, vect_location,
2253 "vect_recog_mult_pattern: detected:\n");
2255 if (dump_enabled_p ())
2256 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2257 pattern_stmt,0);
2259 stmts->safe_push (last_stmt);
2260 *type_in = vectype;
2261 *type_out = vectype;
2263 return pattern_stmt;
2266 /* Detect a signed division by a constant that wouldn't be
2267 otherwise vectorized:
2269 type a_t, b_t;
2271 S1 a_t = b_t / N;
2273 where type 'type' is an integral type and N is a constant.
2275 Similarly handle modulo by a constant:
2277 S4 a_t = b_t % N;
2279 Input/Output:
2281 * STMTS: Contains a stmt from which the pattern search begins,
2282 i.e. the division stmt. S1 is replaced by if N is a power
2283 of two constant and type is signed:
2284 S3 y_t = b_t < 0 ? N - 1 : 0;
2285 S2 x_t = b_t + y_t;
2286 S1' a_t = x_t >> log2 (N);
2288 S4 is replaced if N is a power of two constant and
2289 type is signed by (where *_T temporaries have unsigned type):
2290 S9 y_T = b_t < 0 ? -1U : 0U;
2291 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2292 S7 z_t = (type) z_T;
2293 S6 w_t = b_t + z_t;
2294 S5 x_t = w_t & (N - 1);
2295 S4' a_t = x_t - z_t;
2297 Output:
2299 * TYPE_IN: The type of the input arguments to the pattern.
2301 * TYPE_OUT: The type of the output of this pattern.
2303 * Return value: A new stmt that will be used to replace the division
2304 S1 or modulo S4 stmt. */
2306 static gimple *
2307 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2308 tree *type_in, tree *type_out)
2310 gimple *last_stmt = stmts->pop ();
2311 tree oprnd0, oprnd1, vectype, itype, cond;
2312 gimple *pattern_stmt, *def_stmt;
2313 enum tree_code rhs_code;
2314 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2315 vec_info *vinfo = stmt_vinfo->vinfo;
2316 optab optab;
2317 tree q;
2318 int dummy_int, prec;
2319 stmt_vec_info def_stmt_vinfo;
2321 if (!is_gimple_assign (last_stmt))
2322 return NULL;
2324 rhs_code = gimple_assign_rhs_code (last_stmt);
2325 switch (rhs_code)
2327 case TRUNC_DIV_EXPR:
2328 case TRUNC_MOD_EXPR:
2329 break;
2330 default:
2331 return NULL;
2334 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2335 return NULL;
2337 oprnd0 = gimple_assign_rhs1 (last_stmt);
2338 oprnd1 = gimple_assign_rhs2 (last_stmt);
2339 itype = TREE_TYPE (oprnd0);
2340 if (TREE_CODE (oprnd0) != SSA_NAME
2341 || TREE_CODE (oprnd1) != INTEGER_CST
2342 || TREE_CODE (itype) != INTEGER_TYPE
2343 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2344 return NULL;
2346 vectype = get_vectype_for_scalar_type (itype);
2347 if (vectype == NULL_TREE)
2348 return NULL;
2350 /* If the target can handle vectorized division or modulo natively,
2351 don't attempt to optimize this. */
2352 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2353 if (optab != unknown_optab)
2355 machine_mode vec_mode = TYPE_MODE (vectype);
2356 int icode = (int) optab_handler (optab, vec_mode);
2357 if (icode != CODE_FOR_nothing)
2358 return NULL;
2361 prec = TYPE_PRECISION (itype);
2362 if (integer_pow2p (oprnd1))
2364 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2365 return NULL;
2367 /* Pattern detected. */
2368 if (dump_enabled_p ())
2369 dump_printf_loc (MSG_NOTE, vect_location,
2370 "vect_recog_divmod_pattern: detected:\n");
2372 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2373 build_int_cst (itype, 0));
2374 if (rhs_code == TRUNC_DIV_EXPR)
2376 tree var = vect_recog_temp_ssa_var (itype, NULL);
2377 tree shift;
2378 def_stmt
2379 = gimple_build_assign (var, COND_EXPR, cond,
2380 fold_build2 (MINUS_EXPR, itype, oprnd1,
2381 build_int_cst (itype, 1)),
2382 build_int_cst (itype, 0));
2383 new_pattern_def_seq (stmt_vinfo, def_stmt);
2384 var = vect_recog_temp_ssa_var (itype, NULL);
2385 def_stmt
2386 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2387 gimple_assign_lhs (def_stmt));
2388 append_pattern_def_seq (stmt_vinfo, def_stmt);
2390 shift = build_int_cst (itype, tree_log2 (oprnd1));
2391 pattern_stmt
2392 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2393 RSHIFT_EXPR, var, shift);
2395 else
2397 tree signmask;
2398 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2399 if (compare_tree_int (oprnd1, 2) == 0)
2401 signmask = vect_recog_temp_ssa_var (itype, NULL);
2402 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2403 build_int_cst (itype, 1),
2404 build_int_cst (itype, 0));
2405 append_pattern_def_seq (stmt_vinfo, def_stmt);
2407 else
2409 tree utype
2410 = build_nonstandard_integer_type (prec, 1);
2411 tree vecutype = get_vectype_for_scalar_type (utype);
2412 tree shift
2413 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2414 - tree_log2 (oprnd1));
2415 tree var = vect_recog_temp_ssa_var (utype, NULL);
2417 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2418 build_int_cst (utype, -1),
2419 build_int_cst (utype, 0));
2420 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2421 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2422 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2423 append_pattern_def_seq (stmt_vinfo, def_stmt);
2424 var = vect_recog_temp_ssa_var (utype, NULL);
2425 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2426 gimple_assign_lhs (def_stmt),
2427 shift);
2428 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2429 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2430 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2431 append_pattern_def_seq (stmt_vinfo, def_stmt);
2432 signmask = vect_recog_temp_ssa_var (itype, NULL);
2433 def_stmt
2434 = gimple_build_assign (signmask, NOP_EXPR, var);
2435 append_pattern_def_seq (stmt_vinfo, def_stmt);
2437 def_stmt
2438 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2439 PLUS_EXPR, oprnd0, signmask);
2440 append_pattern_def_seq (stmt_vinfo, def_stmt);
2441 def_stmt
2442 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2443 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2444 fold_build2 (MINUS_EXPR, itype, oprnd1,
2445 build_int_cst (itype, 1)));
2446 append_pattern_def_seq (stmt_vinfo, def_stmt);
2448 pattern_stmt
2449 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2450 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2451 signmask);
2454 if (dump_enabled_p ())
2455 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2458 stmts->safe_push (last_stmt);
2460 *type_in = vectype;
2461 *type_out = vectype;
2462 return pattern_stmt;
2465 if (prec > HOST_BITS_PER_WIDE_INT
2466 || integer_zerop (oprnd1))
2467 return NULL;
2469 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2470 return NULL;
2472 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2474 if (TYPE_UNSIGNED (itype))
2476 unsigned HOST_WIDE_INT mh, ml;
2477 int pre_shift, post_shift;
2478 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2479 & GET_MODE_MASK (TYPE_MODE (itype)));
2480 tree t1, t2, t3, t4;
2482 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2483 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2484 return NULL;
2486 /* Find a suitable multiplier and right shift count
2487 instead of multiplying with D. */
2488 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2490 /* If the suggested multiplier is more than SIZE bits, we can do better
2491 for even divisors, using an initial right shift. */
2492 if (mh != 0 && (d & 1) == 0)
2494 pre_shift = floor_log2 (d & -d);
2495 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2496 &ml, &post_shift, &dummy_int);
2497 gcc_assert (!mh);
2499 else
2500 pre_shift = 0;
2502 if (mh != 0)
2504 if (post_shift - 1 >= prec)
2505 return NULL;
2507 /* t1 = oprnd0 h* ml;
2508 t2 = oprnd0 - t1;
2509 t3 = t2 >> 1;
2510 t4 = t1 + t3;
2511 q = t4 >> (post_shift - 1); */
2512 t1 = vect_recog_temp_ssa_var (itype, NULL);
2513 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2514 build_int_cst (itype, ml));
2515 append_pattern_def_seq (stmt_vinfo, def_stmt);
2517 t2 = vect_recog_temp_ssa_var (itype, NULL);
2518 def_stmt
2519 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2520 append_pattern_def_seq (stmt_vinfo, def_stmt);
2522 t3 = vect_recog_temp_ssa_var (itype, NULL);
2523 def_stmt
2524 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2525 append_pattern_def_seq (stmt_vinfo, def_stmt);
2527 t4 = vect_recog_temp_ssa_var (itype, NULL);
2528 def_stmt
2529 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2531 if (post_shift != 1)
2533 append_pattern_def_seq (stmt_vinfo, def_stmt);
2535 q = vect_recog_temp_ssa_var (itype, NULL);
2536 pattern_stmt
2537 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2538 build_int_cst (itype, post_shift - 1));
2540 else
2542 q = t4;
2543 pattern_stmt = def_stmt;
2546 else
2548 if (pre_shift >= prec || post_shift >= prec)
2549 return NULL;
2551 /* t1 = oprnd0 >> pre_shift;
2552 t2 = t1 h* ml;
2553 q = t2 >> post_shift; */
2554 if (pre_shift)
2556 t1 = vect_recog_temp_ssa_var (itype, NULL);
2557 def_stmt
2558 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2559 build_int_cst (NULL, pre_shift));
2560 append_pattern_def_seq (stmt_vinfo, def_stmt);
2562 else
2563 t1 = oprnd0;
2565 t2 = vect_recog_temp_ssa_var (itype, NULL);
2566 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2567 build_int_cst (itype, ml));
2569 if (post_shift)
2571 append_pattern_def_seq (stmt_vinfo, def_stmt);
2573 q = vect_recog_temp_ssa_var (itype, NULL);
2574 def_stmt
2575 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2576 build_int_cst (itype, post_shift));
2578 else
2579 q = t2;
2581 pattern_stmt = def_stmt;
2584 else
2586 unsigned HOST_WIDE_INT ml;
2587 int post_shift;
2588 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2589 unsigned HOST_WIDE_INT abs_d;
2590 bool add = false;
2591 tree t1, t2, t3, t4;
2593 /* Give up for -1. */
2594 if (d == -1)
2595 return NULL;
2597 /* Since d might be INT_MIN, we have to cast to
2598 unsigned HOST_WIDE_INT before negating to avoid
2599 undefined signed overflow. */
2600 abs_d = (d >= 0
2601 ? (unsigned HOST_WIDE_INT) d
2602 : - (unsigned HOST_WIDE_INT) d);
2604 /* n rem d = n rem -d */
2605 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2607 d = abs_d;
2608 oprnd1 = build_int_cst (itype, abs_d);
2610 else if (HOST_BITS_PER_WIDE_INT >= prec
2611 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2612 /* This case is not handled correctly below. */
2613 return NULL;
2615 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2616 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2618 add = true;
2619 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2621 if (post_shift >= prec)
2622 return NULL;
2624 /* t1 = oprnd0 h* ml; */
2625 t1 = vect_recog_temp_ssa_var (itype, NULL);
2626 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2627 build_int_cst (itype, ml));
2629 if (add)
2631 /* t2 = t1 + oprnd0; */
2632 append_pattern_def_seq (stmt_vinfo, def_stmt);
2633 t2 = vect_recog_temp_ssa_var (itype, NULL);
2634 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2636 else
2637 t2 = t1;
2639 if (post_shift)
2641 /* t3 = t2 >> post_shift; */
2642 append_pattern_def_seq (stmt_vinfo, def_stmt);
2643 t3 = vect_recog_temp_ssa_var (itype, NULL);
2644 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2645 build_int_cst (itype, post_shift));
2647 else
2648 t3 = t2;
2650 wide_int oprnd0_min, oprnd0_max;
2651 int msb = 1;
2652 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2654 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2655 msb = 0;
2656 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2657 msb = -1;
2660 if (msb == 0 && d >= 0)
2662 /* q = t3; */
2663 q = t3;
2664 pattern_stmt = def_stmt;
2666 else
2668 /* t4 = oprnd0 >> (prec - 1);
2669 or if we know from VRP that oprnd0 >= 0
2670 t4 = 0;
2671 or if we know from VRP that oprnd0 < 0
2672 t4 = -1; */
2673 append_pattern_def_seq (stmt_vinfo, def_stmt);
2674 t4 = vect_recog_temp_ssa_var (itype, NULL);
2675 if (msb != 1)
2676 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2677 build_int_cst (itype, msb));
2678 else
2679 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2680 build_int_cst (itype, prec - 1));
2681 append_pattern_def_seq (stmt_vinfo, def_stmt);
2683 /* q = t3 - t4; or q = t4 - t3; */
2684 q = vect_recog_temp_ssa_var (itype, NULL);
2685 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2686 d < 0 ? t3 : t4);
2690 if (rhs_code == TRUNC_MOD_EXPR)
2692 tree r, t1;
2694 /* We divided. Now finish by:
2695 t1 = q * oprnd1;
2696 r = oprnd0 - t1; */
2697 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2699 t1 = vect_recog_temp_ssa_var (itype, NULL);
2700 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2701 append_pattern_def_seq (stmt_vinfo, def_stmt);
2703 r = vect_recog_temp_ssa_var (itype, NULL);
2704 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2707 /* Pattern detected. */
2708 if (dump_enabled_p ())
2710 dump_printf_loc (MSG_NOTE, vect_location,
2711 "vect_recog_divmod_pattern: detected: ");
2712 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2713 dump_printf (MSG_NOTE, "\n");
2716 stmts->safe_push (last_stmt);
2718 *type_in = vectype;
2719 *type_out = vectype;
2720 return pattern_stmt;
2723 /* Function vect_recog_mixed_size_cond_pattern
2725 Try to find the following pattern:
2727 type x_t, y_t;
2728 TYPE a_T, b_T, c_T;
2729 loop:
2730 S1 a_T = x_t CMP y_t ? b_T : c_T;
2732 where type 'TYPE' is an integral type which has different size
2733 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2734 than 'type', the constants need to fit into an integer type
2735 with the same width as 'type') or results of conversion from 'type'.
2737 Input:
2739 * LAST_STMT: A stmt from which the pattern search begins.
2741 Output:
2743 * TYPE_IN: The type of the input arguments to the pattern.
2745 * TYPE_OUT: The type of the output of this pattern.
2747 * Return value: A new stmt that will be used to replace the pattern.
2748 Additionally a def_stmt is added.
2750 a_it = x_t CMP y_t ? b_it : c_it;
2751 a_T = (TYPE) a_it; */
2753 static gimple *
2754 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
2755 tree *type_out)
2757 gimple *last_stmt = (*stmts)[0];
2758 tree cond_expr, then_clause, else_clause;
2759 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2760 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2761 gimple *pattern_stmt, *def_stmt;
2762 vec_info *vinfo = stmt_vinfo->vinfo;
2763 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2764 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
2765 bool promotion;
2766 tree comp_scalar_type;
2768 if (!is_gimple_assign (last_stmt)
2769 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2770 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2771 return NULL;
2773 cond_expr = gimple_assign_rhs1 (last_stmt);
2774 then_clause = gimple_assign_rhs2 (last_stmt);
2775 else_clause = gimple_assign_rhs3 (last_stmt);
2777 if (!COMPARISON_CLASS_P (cond_expr))
2778 return NULL;
2780 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2781 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2782 if (comp_vectype == NULL_TREE)
2783 return NULL;
2785 type = gimple_expr_type (last_stmt);
2786 if (types_compatible_p (type, comp_scalar_type)
2787 || ((TREE_CODE (then_clause) != INTEGER_CST
2788 || TREE_CODE (else_clause) != INTEGER_CST)
2789 && !INTEGRAL_TYPE_P (comp_scalar_type))
2790 || !INTEGRAL_TYPE_P (type))
2791 return NULL;
2793 if ((TREE_CODE (then_clause) != INTEGER_CST
2794 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2795 &def_stmt0, &promotion))
2796 || (TREE_CODE (else_clause) != INTEGER_CST
2797 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2798 &def_stmt1, &promotion)))
2799 return NULL;
2801 if (orig_type0 && orig_type1
2802 && !types_compatible_p (orig_type0, orig_type1))
2803 return NULL;
2805 if (orig_type0)
2807 if (!types_compatible_p (orig_type0, comp_scalar_type))
2808 return NULL;
2809 then_clause = gimple_assign_rhs1 (def_stmt0);
2810 itype = orig_type0;
2813 if (orig_type1)
2815 if (!types_compatible_p (orig_type1, comp_scalar_type))
2816 return NULL;
2817 else_clause = gimple_assign_rhs1 (def_stmt1);
2818 itype = orig_type1;
2822 HOST_WIDE_INT cmp_mode_size
2823 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2825 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
2826 return NULL;
2828 vectype = get_vectype_for_scalar_type (type);
2829 if (vectype == NULL_TREE)
2830 return NULL;
2832 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2833 return NULL;
2835 if (itype == NULL_TREE)
2836 itype = build_nonstandard_integer_type (cmp_mode_size,
2837 TYPE_UNSIGNED (type));
2839 if (itype == NULL_TREE
2840 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
2841 return NULL;
2843 vecitype = get_vectype_for_scalar_type (itype);
2844 if (vecitype == NULL_TREE)
2845 return NULL;
2847 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2848 return NULL;
2850 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
2852 if ((TREE_CODE (then_clause) == INTEGER_CST
2853 && !int_fits_type_p (then_clause, itype))
2854 || (TREE_CODE (else_clause) == INTEGER_CST
2855 && !int_fits_type_p (else_clause, itype)))
2856 return NULL;
2859 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2860 COND_EXPR, unshare_expr (cond_expr),
2861 fold_convert (itype, then_clause),
2862 fold_convert (itype, else_clause));
2863 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2864 NOP_EXPR, gimple_assign_lhs (def_stmt));
2866 new_pattern_def_seq (stmt_vinfo, def_stmt);
2867 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
2868 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2869 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2870 *type_in = vecitype;
2871 *type_out = vectype;
2873 if (dump_enabled_p ())
2874 dump_printf_loc (MSG_NOTE, vect_location,
2875 "vect_recog_mixed_size_cond_pattern: detected:\n");
2877 return pattern_stmt;
2881 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2882 true if bool VAR can be optimized that way. */
2884 static bool
2885 check_bool_pattern (tree var, vec_info *vinfo)
2887 gimple *def_stmt;
2888 enum vect_def_type dt;
2889 tree rhs1;
2890 enum tree_code rhs_code;
2892 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
2893 return false;
2895 if (dt != vect_internal_def)
2896 return false;
2898 if (!is_gimple_assign (def_stmt))
2899 return false;
2901 if (!has_single_use (var))
2902 return false;
2904 rhs1 = gimple_assign_rhs1 (def_stmt);
2905 rhs_code = gimple_assign_rhs_code (def_stmt);
2906 switch (rhs_code)
2908 case SSA_NAME:
2909 return check_bool_pattern (rhs1, vinfo);
2911 CASE_CONVERT:
2912 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2913 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2914 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2915 return false;
2916 return check_bool_pattern (rhs1, vinfo);
2918 case BIT_NOT_EXPR:
2919 return check_bool_pattern (rhs1, vinfo);
2921 case BIT_AND_EXPR:
2922 case BIT_IOR_EXPR:
2923 case BIT_XOR_EXPR:
2924 if (!check_bool_pattern (rhs1, vinfo))
2925 return false;
2926 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
2928 default:
2929 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2931 tree vecitype, comp_vectype;
2933 /* If the comparison can throw, then is_gimple_condexpr will be
2934 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2935 if (stmt_could_throw_p (def_stmt))
2936 return false;
2938 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2939 if (comp_vectype == NULL_TREE)
2940 return false;
2942 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2944 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2945 tree itype
2946 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2947 vecitype = get_vectype_for_scalar_type (itype);
2948 if (vecitype == NULL_TREE)
2949 return false;
2951 else
2952 vecitype = comp_vectype;
2953 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2955 return false;
2960 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2961 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2962 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2964 static tree
2965 adjust_bool_pattern_cast (tree type, tree var)
2967 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2968 gimple *cast_stmt, *pattern_stmt;
2970 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2971 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2972 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2973 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2974 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2975 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2976 return gimple_assign_lhs (cast_stmt);
2980 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2981 recursively. VAR is an SSA_NAME that should be transformed from bool
2982 to a wider integer type, OUT_TYPE is the desired final integer type of
2983 the whole pattern, TRUEVAL should be NULL unless optimizing
2984 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2985 in the then_clause, STMTS is where statements with added pattern stmts
2986 should be pushed to. */
2988 static tree
2989 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2990 vec<gimple *> *stmts)
2992 gimple *stmt = SSA_NAME_DEF_STMT (var);
2993 enum tree_code rhs_code, def_rhs_code;
2994 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2995 location_t loc;
2996 gimple *pattern_stmt, *def_stmt;
2998 rhs1 = gimple_assign_rhs1 (stmt);
2999 rhs2 = gimple_assign_rhs2 (stmt);
3000 rhs_code = gimple_assign_rhs_code (stmt);
3001 loc = gimple_location (stmt);
3002 switch (rhs_code)
3004 case SSA_NAME:
3005 CASE_CONVERT:
3006 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3007 itype = TREE_TYPE (irhs1);
3008 pattern_stmt
3009 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3010 SSA_NAME, irhs1);
3011 break;
3013 case BIT_NOT_EXPR:
3014 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3015 itype = TREE_TYPE (irhs1);
3016 pattern_stmt
3017 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3018 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3019 break;
3021 case BIT_AND_EXPR:
3022 /* Try to optimize x = y & (a < b ? 1 : 0); into
3023 x = (a < b ? y : 0);
3025 E.g. for:
3026 bool a_b, b_b, c_b;
3027 TYPE d_T;
3029 S1 a_b = x1 CMP1 y1;
3030 S2 b_b = x2 CMP2 y2;
3031 S3 c_b = a_b & b_b;
3032 S4 d_T = (TYPE) c_b;
3034 we would normally emit:
3036 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3037 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3038 S3' c_T = a_T & b_T;
3039 S4' d_T = c_T;
3041 but we can save one stmt by using the
3042 result of one of the COND_EXPRs in the other COND_EXPR and leave
3043 BIT_AND_EXPR stmt out:
3045 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3046 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3047 S4' f_T = c_T;
3049 At least when VEC_COND_EXPR is implemented using masks
3050 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3051 computes the comparison masks and ands it, in one case with
3052 all ones vector, in the other case with a vector register.
3053 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3054 often more expensive. */
3055 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3056 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3057 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3059 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3060 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3061 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3062 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3064 gimple *tstmt;
3065 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3066 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3067 tstmt = stmts->pop ();
3068 gcc_assert (tstmt == def_stmt);
3069 stmts->quick_push (stmt);
3070 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3071 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3072 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3073 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3074 return irhs2;
3076 else
3077 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3078 goto and_ior_xor;
3080 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3081 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3082 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3084 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3085 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3086 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3087 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3089 gimple *tstmt;
3090 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3091 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3092 tstmt = stmts->pop ();
3093 gcc_assert (tstmt == def_stmt);
3094 stmts->quick_push (stmt);
3095 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3096 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3097 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3098 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3099 return irhs1;
3101 else
3102 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3103 goto and_ior_xor;
3105 /* FALLTHRU */
3106 case BIT_IOR_EXPR:
3107 case BIT_XOR_EXPR:
3108 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3109 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3110 and_ior_xor:
3111 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3112 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3114 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3115 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3116 int out_prec = TYPE_PRECISION (out_type);
3117 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3118 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3119 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3120 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3121 else
3123 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3124 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3127 itype = TREE_TYPE (irhs1);
3128 pattern_stmt
3129 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3130 rhs_code, irhs1, irhs2);
3131 break;
3133 default:
3134 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3135 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3136 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3137 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3138 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3140 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3141 itype
3142 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3144 else
3145 itype = TREE_TYPE (rhs1);
3146 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3147 if (trueval == NULL_TREE)
3148 trueval = build_int_cst (itype, 1);
3149 else
3150 gcc_checking_assert (useless_type_conversion_p (itype,
3151 TREE_TYPE (trueval)));
3152 pattern_stmt
3153 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3154 COND_EXPR, cond_expr, trueval,
3155 build_int_cst (itype, 0));
3156 break;
3159 stmts->safe_push (stmt);
3160 gimple_set_location (pattern_stmt, loc);
3161 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3162 return gimple_assign_lhs (pattern_stmt);
3166 /* Function vect_recog_bool_pattern
3168 Try to find pattern like following:
3170 bool a_b, b_b, c_b, d_b, e_b;
3171 TYPE f_T;
3172 loop:
3173 S1 a_b = x1 CMP1 y1;
3174 S2 b_b = x2 CMP2 y2;
3175 S3 c_b = a_b & b_b;
3176 S4 d_b = x3 CMP3 y3;
3177 S5 e_b = c_b | d_b;
3178 S6 f_T = (TYPE) e_b;
3180 where type 'TYPE' is an integral type. Or a similar pattern
3181 ending in
3183 S6 f_Y = e_b ? r_Y : s_Y;
3185 as results from if-conversion of a complex condition.
3187 Input:
3189 * LAST_STMT: A stmt at the end from which the pattern
3190 search begins, i.e. cast of a bool to
3191 an integer type.
3193 Output:
3195 * TYPE_IN: The type of the input arguments to the pattern.
3197 * TYPE_OUT: The type of the output of this pattern.
3199 * Return value: A new stmt that will be used to replace the pattern.
3201 Assuming size of TYPE is the same as size of all comparisons
3202 (otherwise some casts would be added where needed), the above
3203 sequence we create related pattern stmts:
3204 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3205 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3206 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3207 S5' e_T = c_T | d_T;
3208 S6' f_T = e_T;
3210 Instead of the above S3' we could emit:
3211 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3212 S3' c_T = a_T | b_T;
3213 but the above is more efficient. */
3215 static gimple *
3216 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3217 tree *type_out)
3219 gimple *last_stmt = stmts->pop ();
3220 enum tree_code rhs_code;
3221 tree var, lhs, rhs, vectype;
3222 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3223 vec_info *vinfo = stmt_vinfo->vinfo;
3224 gimple *pattern_stmt;
3226 if (!is_gimple_assign (last_stmt))
3227 return NULL;
3229 var = gimple_assign_rhs1 (last_stmt);
3230 lhs = gimple_assign_lhs (last_stmt);
3232 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3233 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3234 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3235 return NULL;
3237 rhs_code = gimple_assign_rhs_code (last_stmt);
3238 if (CONVERT_EXPR_CODE_P (rhs_code))
3240 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3241 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3242 return NULL;
3243 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3244 if (vectype == NULL_TREE)
3245 return NULL;
3247 if (!check_bool_pattern (var, vinfo))
3248 return NULL;
3250 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3251 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3252 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3253 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3254 else
3255 pattern_stmt
3256 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3257 *type_out = vectype;
3258 *type_in = vectype;
3259 stmts->safe_push (last_stmt);
3260 if (dump_enabled_p ())
3261 dump_printf_loc (MSG_NOTE, vect_location,
3262 "vect_recog_bool_pattern: detected:\n");
3264 return pattern_stmt;
3266 else if (rhs_code == COND_EXPR
3267 && TREE_CODE (var) == SSA_NAME)
3269 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3270 if (vectype == NULL_TREE)
3271 return NULL;
3273 /* Build a scalar type for the boolean result that when
3274 vectorized matches the vector type of the result in
3275 size and number of elements. */
3276 unsigned prec
3277 = wi::udiv_trunc (TYPE_SIZE (vectype),
3278 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3279 tree type
3280 = build_nonstandard_integer_type (prec,
3281 TYPE_UNSIGNED (TREE_TYPE (var)));
3282 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3283 return NULL;
3285 if (!check_bool_pattern (var, vinfo))
3286 return NULL;
3288 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3289 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3290 pattern_stmt
3291 = gimple_build_assign (lhs, COND_EXPR,
3292 build2 (NE_EXPR, boolean_type_node,
3293 rhs, build_int_cst (type, 0)),
3294 gimple_assign_rhs2 (last_stmt),
3295 gimple_assign_rhs3 (last_stmt));
3296 *type_out = vectype;
3297 *type_in = vectype;
3298 stmts->safe_push (last_stmt);
3299 if (dump_enabled_p ())
3300 dump_printf_loc (MSG_NOTE, vect_location,
3301 "vect_recog_bool_pattern: detected:\n");
3303 return pattern_stmt;
3305 else if (rhs_code == SSA_NAME
3306 && STMT_VINFO_DATA_REF (stmt_vinfo))
3308 stmt_vec_info pattern_stmt_info;
3309 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3310 gcc_assert (vectype != NULL_TREE);
3311 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3312 return NULL;
3313 if (!check_bool_pattern (var, vinfo))
3314 return NULL;
3316 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3317 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3318 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3320 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3321 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3322 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3323 rhs = rhs2;
3325 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3326 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3327 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3328 STMT_VINFO_DATA_REF (pattern_stmt_info)
3329 = STMT_VINFO_DATA_REF (stmt_vinfo);
3330 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3331 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3332 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3333 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3334 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3335 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3336 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3337 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3338 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3339 *type_out = vectype;
3340 *type_in = vectype;
3341 stmts->safe_push (last_stmt);
3342 if (dump_enabled_p ())
3343 dump_printf_loc (MSG_NOTE, vect_location,
3344 "vect_recog_bool_pattern: detected:\n");
3345 return pattern_stmt;
3347 else
3348 return NULL;
3352 /* Mark statements that are involved in a pattern. */
3354 static inline void
3355 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
3356 tree pattern_vectype)
3358 stmt_vec_info pattern_stmt_info, def_stmt_info;
3359 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3360 vec_info *vinfo = orig_stmt_info->vinfo;
3361 gimple *def_stmt;
3363 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3364 if (pattern_stmt_info == NULL)
3366 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3367 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3369 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3371 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3372 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3373 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3374 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3375 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3376 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3377 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3378 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3379 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3381 gimple_stmt_iterator si;
3382 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3383 !gsi_end_p (si); gsi_next (&si))
3385 def_stmt = gsi_stmt (si);
3386 def_stmt_info = vinfo_for_stmt (def_stmt);
3387 if (def_stmt_info == NULL)
3389 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3390 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3392 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3393 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3394 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3395 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3396 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3401 /* Function vect_pattern_recog_1
3403 Input:
3404 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3405 computation pattern.
3406 STMT: A stmt from which the pattern search should start.
3408 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3409 expression that computes the same functionality and can be used to
3410 replace the sequence of stmts that are involved in the pattern.
3412 Output:
3413 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3414 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3415 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3416 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3417 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3418 to the available target pattern.
3420 This function also does some bookkeeping, as explained in the documentation
3421 for vect_recog_pattern. */
3423 static void
3424 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3425 gimple_stmt_iterator si,
3426 vec<gimple *> *stmts_to_replace)
3428 gimple *stmt = gsi_stmt (si), *pattern_stmt;
3429 stmt_vec_info stmt_info;
3430 loop_vec_info loop_vinfo;
3431 tree pattern_vectype;
3432 tree type_in, type_out;
3433 enum tree_code code;
3434 int i;
3435 gimple *next;
3437 stmts_to_replace->truncate (0);
3438 stmts_to_replace->quick_push (stmt);
3439 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3440 if (!pattern_stmt)
3441 return;
3443 stmt = stmts_to_replace->last ();
3444 stmt_info = vinfo_for_stmt (stmt);
3445 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3447 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3449 /* No need to check target support (already checked by the pattern
3450 recognition function). */
3451 pattern_vectype = type_out ? type_out : type_in;
3453 else
3455 machine_mode vec_mode;
3456 enum insn_code icode;
3457 optab optab;
3459 /* Check target support */
3460 type_in = get_vectype_for_scalar_type (type_in);
3461 if (!type_in)
3462 return;
3463 if (type_out)
3464 type_out = get_vectype_for_scalar_type (type_out);
3465 else
3466 type_out = type_in;
3467 if (!type_out)
3468 return;
3469 pattern_vectype = type_out;
3471 if (is_gimple_assign (pattern_stmt))
3472 code = gimple_assign_rhs_code (pattern_stmt);
3473 else
3475 gcc_assert (is_gimple_call (pattern_stmt));
3476 code = CALL_EXPR;
3479 optab = optab_for_tree_code (code, type_in, optab_default);
3480 vec_mode = TYPE_MODE (type_in);
3481 if (!optab
3482 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3483 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3484 return;
3487 /* Found a vectorizable pattern. */
3488 if (dump_enabled_p ())
3490 dump_printf_loc (MSG_NOTE, vect_location,
3491 "pattern recognized: ");
3492 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3495 /* Mark the stmts that are involved in the pattern. */
3496 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3498 /* Patterns cannot be vectorized using SLP, because they change the order of
3499 computation. */
3500 if (loop_vinfo)
3501 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3502 if (next == stmt)
3503 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3505 /* It is possible that additional pattern stmts are created and inserted in
3506 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3507 relevant statements. */
3508 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3509 && (unsigned) i < (stmts_to_replace->length () - 1);
3510 i++)
3512 stmt_info = vinfo_for_stmt (stmt);
3513 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3514 if (dump_enabled_p ())
3516 dump_printf_loc (MSG_NOTE, vect_location,
3517 "additional pattern stmt: ");
3518 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3521 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3526 /* Function vect_pattern_recog
3528 Input:
3529 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3530 computation idioms.
3532 Output - for each computation idiom that is detected we create a new stmt
3533 that provides the same functionality and that can be vectorized. We
3534 also record some information in the struct_stmt_info of the relevant
3535 stmts, as explained below:
3537 At the entry to this function we have the following stmts, with the
3538 following initial value in the STMT_VINFO fields:
3540 stmt in_pattern_p related_stmt vec_stmt
3541 S1: a_i = .... - - -
3542 S2: a_2 = ..use(a_i).. - - -
3543 S3: a_1 = ..use(a_2).. - - -
3544 S4: a_0 = ..use(a_1).. - - -
3545 S5: ... = ..use(a_0).. - - -
3547 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3548 represented by a single stmt. We then:
3549 - create a new stmt S6 equivalent to the pattern (the stmt is not
3550 inserted into the code)
3551 - fill in the STMT_VINFO fields as follows:
3553 in_pattern_p related_stmt vec_stmt
3554 S1: a_i = .... - - -
3555 S2: a_2 = ..use(a_i).. - - -
3556 S3: a_1 = ..use(a_2).. - - -
3557 S4: a_0 = ..use(a_1).. true S6 -
3558 '---> S6: a_new = .... - S4 -
3559 S5: ... = ..use(a_0).. - - -
3561 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3562 to each other through the RELATED_STMT field).
3564 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3565 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3566 remain irrelevant unless used by stmts other than S4.
3568 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3569 (because they are marked as irrelevant). It will vectorize S6, and record
3570 a pointer to the new vector stmt VS6 from S6 (as usual).
3571 S4 will be skipped, and S5 will be vectorized as usual:
3573 in_pattern_p related_stmt vec_stmt
3574 S1: a_i = .... - - -
3575 S2: a_2 = ..use(a_i).. - - -
3576 S3: a_1 = ..use(a_2).. - - -
3577 > VS6: va_new = .... - - -
3578 S4: a_0 = ..use(a_1).. true S6 VS6
3579 '---> S6: a_new = .... - S4 VS6
3580 > VS5: ... = ..vuse(va_new).. - - -
3581 S5: ... = ..use(a_0).. - - -
3583 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3584 elsewhere), and we'll end up with:
3586 VS6: va_new = ....
3587 VS5: ... = ..vuse(va_new)..
3589 In case of more than one pattern statements, e.g., widen-mult with
3590 intermediate type:
3592 S1 a_t = ;
3593 S2 a_T = (TYPE) a_t;
3594 '--> S3: a_it = (interm_type) a_t;
3595 S4 prod_T = a_T * CONST;
3596 '--> S5: prod_T' = a_it w* CONST;
3598 there may be other users of a_T outside the pattern. In that case S2 will
3599 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3600 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3601 be recorded in S3. */
3603 void
3604 vect_pattern_recog (vec_info *vinfo)
3606 struct loop *loop;
3607 basic_block *bbs;
3608 unsigned int nbbs;
3609 gimple_stmt_iterator si;
3610 unsigned int i, j;
3611 vect_recog_func_ptr vect_recog_func;
3612 auto_vec<gimple *, 1> stmts_to_replace;
3613 gimple *stmt;
3615 if (dump_enabled_p ())
3616 dump_printf_loc (MSG_NOTE, vect_location,
3617 "=== vect_pattern_recog ===\n");
3619 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
3621 loop = LOOP_VINFO_LOOP (loop_vinfo);
3622 bbs = LOOP_VINFO_BBS (loop_vinfo);
3623 nbbs = loop->num_nodes;
3625 else
3627 bbs = &as_a <bb_vec_info> (vinfo)->bb;
3628 nbbs = 1;
3631 /* Scan through the loop stmts, applying the pattern recognition
3632 functions starting at each stmt visited: */
3633 for (i = 0; i < nbbs; i++)
3635 basic_block bb = bbs[i];
3636 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3638 if (is_a <bb_vec_info> (vinfo)
3639 && (stmt = gsi_stmt (si))
3640 && vinfo_for_stmt (stmt)
3641 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3642 continue;
3644 /* Scan over all generic vect_recog_xxx_pattern functions. */
3645 for (j = 0; j < NUM_PATTERNS; j++)
3647 vect_recog_func = vect_vect_recog_func_ptrs[j];
3648 vect_pattern_recog_1 (vect_recog_func, si,
3649 &stmts_to_replace);