Daily bump.
[official-gcc.git] / gcc / tree-vect-patterns.c
blobc2a6f0d3fd57c66d2c912d736a231e06712cd825
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
45 /* Pattern recognition functions */
46 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
47 tree *);
48 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
49 tree *);
50 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
53 tree *);
54 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
55 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
56 tree *);
57 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
58 tree *, tree *);
59 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
60 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
61 tree *, tree *);
62 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
63 tree *, tree *);
65 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
66 tree *, tree *);
68 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69 tree *, tree *);
70 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
71 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
73 struct vect_recog_func
75 vect_recog_func_ptr fn;
76 const char *name;
79 /* Note that ordering matters - the first pattern matching on a stmt
80 is taken which means usually the more complex one needs to preceed
81 the less comples onex (widen_sum only after dot_prod or sad for example). */
82 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
83 { vect_recog_widen_mult_pattern, "widen_mult" },
84 { vect_recog_dot_prod_pattern, "dot_prod" },
85 { vect_recog_sad_pattern, "sad" },
86 { vect_recog_widen_sum_pattern, "widen_sum" },
87 { vect_recog_pow_pattern, "pow" },
88 { vect_recog_widen_shift_pattern, "widen_shift" },
89 { vect_recog_over_widening_pattern, "over_widening" },
90 { vect_recog_rotate_pattern, "rotate" },
91 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
92 { vect_recog_divmod_pattern, "divmod" },
93 { vect_recog_mult_pattern, "mult" },
94 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
95 { vect_recog_bool_pattern, "bool" },
96 { vect_recog_mask_conversion_pattern, "mask_conversion" }
99 static inline void
100 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
102 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
103 stmt);
106 static inline void
107 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
109 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
110 append_pattern_def_seq (stmt_info, stmt);
113 /* Check whether STMT2 is in the same loop or basic block as STMT1.
114 Which of the two applies depends on whether we're currently doing
115 loop-based or basic-block-based vectorization, as determined by
116 the vinfo_for_stmt for STMT1 (which must be defined).
118 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
119 to be defined as well. */
121 static bool
122 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
125 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
128 /* If the LHS of DEF_STMT has a single use, and that statement is
129 in the same loop or basic block, return it. */
131 static gimple *
132 vect_single_imm_use (gimple *def_stmt)
134 tree lhs = gimple_assign_lhs (def_stmt);
135 use_operand_p use_p;
136 gimple *use_stmt;
138 if (!single_imm_use (lhs, &use_p, &use_stmt))
139 return NULL;
141 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
142 return NULL;
144 return use_stmt;
147 /* Check whether NAME, an ssa-name used in USE_STMT,
148 is a result of a type promotion, such that:
149 DEF_STMT: NAME = NOP (name0)
150 If CHECK_SIGN is TRUE, check that either both types are signed or both are
151 unsigned. */
153 static bool
154 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
155 tree *orig_type, gimple **def_stmt, bool *promotion)
157 gimple *dummy_gimple;
158 stmt_vec_info stmt_vinfo;
159 tree type = TREE_TYPE (name);
160 tree oprnd0;
161 enum vect_def_type dt;
163 stmt_vinfo = vinfo_for_stmt (use_stmt);
164 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
165 return false;
167 if (dt != vect_internal_def
168 && dt != vect_external_def && dt != vect_constant_def)
169 return false;
171 if (!*def_stmt)
172 return false;
174 if (dt == vect_internal_def)
176 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
177 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
178 return false;
181 if (!is_gimple_assign (*def_stmt))
182 return false;
184 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
185 return false;
187 oprnd0 = gimple_assign_rhs1 (*def_stmt);
189 *orig_type = TREE_TYPE (oprnd0);
190 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
191 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
192 return false;
194 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
195 *promotion = true;
196 else
197 *promotion = false;
199 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
200 return false;
202 return true;
205 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
206 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
208 static tree
209 vect_recog_temp_ssa_var (tree type, gimple *stmt)
211 return make_temp_ssa_name (type, stmt, "patt");
214 /* Function vect_recog_dot_prod_pattern
216 Try to find the following pattern:
218 type x_t, y_t;
219 TYPE1 prod;
220 TYPE2 sum = init;
221 loop:
222 sum_0 = phi <init, sum_1>
223 S1 x_t = ...
224 S2 y_t = ...
225 S3 x_T = (TYPE1) x_t;
226 S4 y_T = (TYPE1) y_t;
227 S5 prod = x_T * y_T;
228 [S6 prod = (TYPE2) prod; #optional]
229 S7 sum_1 = prod + sum_0;
231 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
232 same size of 'TYPE1' or bigger. This is a special case of a reduction
233 computation.
235 Input:
237 * STMTS: Contains a stmt from which the pattern search begins. In the
238 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
239 will be detected.
241 Output:
243 * TYPE_IN: The type of the input arguments to the pattern.
245 * TYPE_OUT: The type of the output of this pattern.
247 * Return value: A new stmt that will be used to replace the sequence of
248 stmts that constitute the pattern. In this case it will be:
249 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
251 Note: The dot-prod idiom is a widening reduction pattern that is
252 vectorized without preserving all the intermediate results. It
253 produces only N/2 (widened) results (by summing up pairs of
254 intermediate results) rather than all N results. Therefore, we
255 cannot allow this pattern when we want to get all the results and in
256 the correct order (as is the case when this computation is in an
257 inner-loop nested in an outer-loop that us being vectorized). */
259 static gimple *
260 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
261 tree *type_out)
263 gimple *stmt, *last_stmt = (*stmts)[0];
264 tree oprnd0, oprnd1;
265 tree oprnd00, oprnd01;
266 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
267 tree type, half_type;
268 gimple *pattern_stmt;
269 tree prod_type;
270 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
271 struct loop *loop;
272 tree var;
273 bool promotion;
275 if (!loop_info)
276 return NULL;
278 loop = LOOP_VINFO_LOOP (loop_info);
280 /* We don't allow changing the order of the computation in the inner-loop
281 when doing outer-loop vectorization. */
282 if (loop && nested_in_vect_loop_p (loop, last_stmt))
283 return NULL;
285 if (!is_gimple_assign (last_stmt))
286 return NULL;
288 type = gimple_expr_type (last_stmt);
290 /* Look for the following pattern
291 DX = (TYPE1) X;
292 DY = (TYPE1) Y;
293 DPROD = DX * DY;
294 DDPROD = (TYPE2) DPROD;
295 sum_1 = DDPROD + sum_0;
296 In which
297 - DX is double the size of X
298 - DY is double the size of Y
299 - DX, DY, DPROD all have the same type
300 - sum is the same size of DPROD or bigger
301 - sum has been recognized as a reduction variable.
303 This is equivalent to:
304 DPROD = X w* Y; #widen mult
305 sum_1 = DPROD w+ sum_0; #widen summation
307 DPROD = X w* Y; #widen mult
308 sum_1 = DPROD + sum_0; #summation
311 /* Starting from LAST_STMT, follow the defs of its uses in search
312 of the above pattern. */
314 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
315 return NULL;
317 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
319 /* Has been detected as widening-summation? */
321 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
322 type = gimple_expr_type (stmt);
323 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
324 return NULL;
325 oprnd0 = gimple_assign_rhs1 (stmt);
326 oprnd1 = gimple_assign_rhs2 (stmt);
327 half_type = TREE_TYPE (oprnd0);
329 else
331 gimple *def_stmt;
333 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
334 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
335 return NULL;
336 oprnd0 = gimple_assign_rhs1 (last_stmt);
337 oprnd1 = gimple_assign_rhs2 (last_stmt);
338 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
339 || !types_compatible_p (TREE_TYPE (oprnd1), type))
340 return NULL;
341 stmt = last_stmt;
343 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
344 &promotion)
345 && promotion)
347 stmt = def_stmt;
348 oprnd0 = gimple_assign_rhs1 (stmt);
350 else
351 half_type = type;
354 /* So far so good. Since last_stmt was detected as a (summation) reduction,
355 we know that oprnd1 is the reduction variable (defined by a loop-header
356 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
357 Left to check that oprnd0 is defined by a (widen_)mult_expr */
358 if (TREE_CODE (oprnd0) != SSA_NAME)
359 return NULL;
361 prod_type = half_type;
362 stmt = SSA_NAME_DEF_STMT (oprnd0);
364 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
365 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
366 return NULL;
368 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
369 inside the loop (in case we are analyzing an outer-loop). */
370 if (!is_gimple_assign (stmt))
371 return NULL;
372 stmt_vinfo = vinfo_for_stmt (stmt);
373 gcc_assert (stmt_vinfo);
374 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
375 return NULL;
376 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
377 return NULL;
378 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
380 /* Has been detected as a widening multiplication? */
382 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
383 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
384 return NULL;
385 stmt_vinfo = vinfo_for_stmt (stmt);
386 gcc_assert (stmt_vinfo);
387 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
388 oprnd00 = gimple_assign_rhs1 (stmt);
389 oprnd01 = gimple_assign_rhs2 (stmt);
390 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
391 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
393 else
395 tree half_type0, half_type1;
396 gimple *def_stmt;
397 tree oprnd0, oprnd1;
399 oprnd0 = gimple_assign_rhs1 (stmt);
400 oprnd1 = gimple_assign_rhs2 (stmt);
401 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
402 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
403 return NULL;
404 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
405 &promotion)
406 || !promotion)
407 return NULL;
408 oprnd00 = gimple_assign_rhs1 (def_stmt);
409 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
410 &promotion)
411 || !promotion)
412 return NULL;
413 oprnd01 = gimple_assign_rhs1 (def_stmt);
414 if (!types_compatible_p (half_type0, half_type1))
415 return NULL;
416 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
417 return NULL;
420 half_type = TREE_TYPE (oprnd00);
421 *type_in = half_type;
422 *type_out = type;
424 /* Pattern detected. Create a stmt to be used to replace the pattern: */
425 var = vect_recog_temp_ssa_var (type, NULL);
426 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
427 oprnd00, oprnd01, oprnd1);
429 if (dump_enabled_p ())
431 dump_printf_loc (MSG_NOTE, vect_location,
432 "vect_recog_dot_prod_pattern: detected: ");
433 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
434 dump_printf (MSG_NOTE, "\n");
437 return pattern_stmt;
441 /* Function vect_recog_sad_pattern
443 Try to find the following Sum of Absolute Difference (SAD) pattern:
445 type x_t, y_t;
446 signed TYPE1 diff, abs_diff;
447 TYPE2 sum = init;
448 loop:
449 sum_0 = phi <init, sum_1>
450 S1 x_t = ...
451 S2 y_t = ...
452 S3 x_T = (TYPE1) x_t;
453 S4 y_T = (TYPE1) y_t;
454 S5 diff = x_T - y_T;
455 S6 abs_diff = ABS_EXPR <diff>;
456 [S7 abs_diff = (TYPE2) abs_diff; #optional]
457 S8 sum_1 = abs_diff + sum_0;
459 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
460 same size of 'TYPE1' or bigger. This is a special case of a reduction
461 computation.
463 Input:
465 * STMTS: Contains a stmt from which the pattern search begins. In the
466 example, when this function is called with S8, the pattern
467 {S3,S4,S5,S6,S7,S8} will be detected.
469 Output:
471 * TYPE_IN: The type of the input arguments to the pattern.
473 * TYPE_OUT: The type of the output of this pattern.
475 * Return value: A new stmt that will be used to replace the sequence of
476 stmts that constitute the pattern. In this case it will be:
477 SAD_EXPR <x_t, y_t, sum_0>
480 static gimple *
481 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
482 tree *type_out)
484 gimple *last_stmt = (*stmts)[0];
485 tree sad_oprnd0, sad_oprnd1;
486 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
487 tree half_type;
488 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
489 struct loop *loop;
490 bool promotion;
492 if (!loop_info)
493 return NULL;
495 loop = LOOP_VINFO_LOOP (loop_info);
497 /* We don't allow changing the order of the computation in the inner-loop
498 when doing outer-loop vectorization. */
499 if (loop && nested_in_vect_loop_p (loop, last_stmt))
500 return NULL;
502 if (!is_gimple_assign (last_stmt))
503 return NULL;
505 tree sum_type = gimple_expr_type (last_stmt);
507 /* Look for the following pattern
508 DX = (TYPE1) X;
509 DY = (TYPE1) Y;
510 DDIFF = DX - DY;
511 DAD = ABS_EXPR <DDIFF>;
512 DDPROD = (TYPE2) DPROD;
513 sum_1 = DAD + sum_0;
514 In which
515 - DX is at least double the size of X
516 - DY is at least double the size of Y
517 - DX, DY, DDIFF, DAD all have the same type
518 - sum is the same size of DAD or bigger
519 - sum has been recognized as a reduction variable.
521 This is equivalent to:
522 DDIFF = X w- Y; #widen sub
523 DAD = ABS_EXPR <DDIFF>;
524 sum_1 = DAD w+ sum_0; #widen summation
526 DDIFF = X w- Y; #widen sub
527 DAD = ABS_EXPR <DDIFF>;
528 sum_1 = DAD + sum_0; #summation
531 /* Starting from LAST_STMT, follow the defs of its uses in search
532 of the above pattern. */
534 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
535 return NULL;
537 tree plus_oprnd0, plus_oprnd1;
539 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
541 /* Has been detected as widening-summation? */
543 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
544 sum_type = gimple_expr_type (stmt);
545 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
546 return NULL;
547 plus_oprnd0 = gimple_assign_rhs1 (stmt);
548 plus_oprnd1 = gimple_assign_rhs2 (stmt);
549 half_type = TREE_TYPE (plus_oprnd0);
551 else
553 gimple *def_stmt;
555 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
556 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
557 return NULL;
558 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
559 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
560 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
561 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
562 return NULL;
564 /* The type conversion could be promotion, demotion,
565 or just signed -> unsigned. */
566 if (type_conversion_p (plus_oprnd0, last_stmt, false,
567 &half_type, &def_stmt, &promotion))
568 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
569 else
570 half_type = sum_type;
573 /* So far so good. Since last_stmt was detected as a (summation) reduction,
574 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
575 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
576 Then check that plus_oprnd0 is defined by an abs_expr. */
578 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
579 return NULL;
581 tree abs_type = half_type;
582 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
584 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
585 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
586 return NULL;
588 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
589 inside the loop (in case we are analyzing an outer-loop). */
590 if (!is_gimple_assign (abs_stmt))
591 return NULL;
593 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
594 gcc_assert (abs_stmt_vinfo);
595 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
596 return NULL;
597 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
598 return NULL;
600 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
601 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
602 return NULL;
603 if (TYPE_UNSIGNED (abs_type))
604 return NULL;
606 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
608 if (TREE_CODE (abs_oprnd) != SSA_NAME)
609 return NULL;
611 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
613 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
614 if (!gimple_bb (diff_stmt)
615 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
616 return NULL;
618 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
619 inside the loop (in case we are analyzing an outer-loop). */
620 if (!is_gimple_assign (diff_stmt))
621 return NULL;
623 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
624 gcc_assert (diff_stmt_vinfo);
625 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
626 return NULL;
627 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
628 return NULL;
630 tree half_type0, half_type1;
631 gimple *def_stmt;
633 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
634 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
636 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
637 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
638 return NULL;
639 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
640 &half_type0, &def_stmt, &promotion)
641 || !promotion)
642 return NULL;
643 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
645 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
646 &half_type1, &def_stmt, &promotion)
647 || !promotion)
648 return NULL;
649 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
651 if (!types_compatible_p (half_type0, half_type1))
652 return NULL;
653 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
654 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
655 return NULL;
657 *type_in = TREE_TYPE (sad_oprnd0);
658 *type_out = sum_type;
660 /* Pattern detected. Create a stmt to be used to replace the pattern: */
661 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
662 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
663 sad_oprnd1, plus_oprnd1);
665 if (dump_enabled_p ())
667 dump_printf_loc (MSG_NOTE, vect_location,
668 "vect_recog_sad_pattern: detected: ");
669 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
670 dump_printf (MSG_NOTE, "\n");
673 return pattern_stmt;
677 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
678 and LSHIFT_EXPR.
680 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
681 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
683 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
684 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
685 that satisfies the above restrictions, we can perform a widening opeartion
686 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
687 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
689 static bool
690 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
691 tree const_oprnd, tree *oprnd,
692 gimple **wstmt, tree type,
693 tree *half_type, gimple *def_stmt)
695 tree new_type, new_oprnd;
697 if (code != MULT_EXPR && code != LSHIFT_EXPR)
698 return false;
700 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
701 || (code == LSHIFT_EXPR
702 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
703 != 1))
704 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
706 /* CONST_OPRND is a constant of HALF_TYPE. */
707 *oprnd = gimple_assign_rhs1 (def_stmt);
708 return true;
711 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
712 return false;
714 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
715 return false;
717 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
718 a type 2 times bigger than HALF_TYPE. */
719 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
720 TYPE_UNSIGNED (type));
721 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
722 || (code == LSHIFT_EXPR
723 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
724 return false;
726 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
727 *oprnd = gimple_assign_rhs1 (def_stmt);
728 new_oprnd = make_ssa_name (new_type);
729 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
730 *oprnd = new_oprnd;
732 *half_type = new_type;
733 return true;
737 /* Function vect_recog_widen_mult_pattern
739 Try to find the following pattern:
741 type1 a_t;
742 type2 b_t;
743 TYPE a_T, b_T, prod_T;
745 S1 a_t = ;
746 S2 b_t = ;
747 S3 a_T = (TYPE) a_t;
748 S4 b_T = (TYPE) b_t;
749 S5 prod_T = a_T * b_T;
751 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
753 Also detect unsigned cases:
755 unsigned type1 a_t;
756 unsigned type2 b_t;
757 unsigned TYPE u_prod_T;
758 TYPE a_T, b_T, prod_T;
760 S1 a_t = ;
761 S2 b_t = ;
762 S3 a_T = (TYPE) a_t;
763 S4 b_T = (TYPE) b_t;
764 S5 prod_T = a_T * b_T;
765 S6 u_prod_T = (unsigned TYPE) prod_T;
767 and multiplication by constants:
769 type a_t;
770 TYPE a_T, prod_T;
772 S1 a_t = ;
773 S3 a_T = (TYPE) a_t;
774 S5 prod_T = a_T * CONST;
776 A special case of multiplication by constants is when 'TYPE' is 4 times
777 bigger than 'type', but CONST fits an intermediate type 2 times smaller
778 than 'TYPE'. In that case we create an additional pattern stmt for S3
779 to create a variable of the intermediate type, and perform widen-mult
780 on the intermediate type as well:
782 type a_t;
783 interm_type a_it;
784 TYPE a_T, prod_T, prod_T';
786 S1 a_t = ;
787 S3 a_T = (TYPE) a_t;
788 '--> a_it = (interm_type) a_t;
789 S5 prod_T = a_T * CONST;
790 '--> prod_T' = a_it w* CONST;
792 Input/Output:
794 * STMTS: Contains a stmt from which the pattern search begins. In the
795 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
796 is detected. In case of unsigned widen-mult, the original stmt (S5) is
797 replaced with S6 in STMTS. In case of multiplication by a constant
798 of an intermediate type (the last case above), STMTS also contains S3
799 (inserted before S5).
801 Output:
803 * TYPE_IN: The type of the input arguments to the pattern.
805 * TYPE_OUT: The type of the output of this pattern.
807 * Return value: A new stmt that will be used to replace the sequence of
808 stmts that constitute the pattern. In this case it will be:
809 WIDEN_MULT <a_t, b_t>
810 If the result of WIDEN_MULT needs to be converted to a larger type, the
811 returned stmt will be this type conversion stmt.
814 static gimple *
815 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
816 tree *type_in, tree *type_out)
818 gimple *last_stmt = stmts->pop ();
819 gimple *def_stmt0, *def_stmt1;
820 tree oprnd0, oprnd1;
821 tree type, half_type0, half_type1;
822 gimple *new_stmt = NULL, *pattern_stmt = NULL;
823 tree vectype, vecitype;
824 tree var;
825 enum tree_code dummy_code;
826 int dummy_int;
827 vec<tree> dummy_vec;
828 bool op1_ok;
829 bool promotion;
831 if (!is_gimple_assign (last_stmt))
832 return NULL;
834 type = gimple_expr_type (last_stmt);
836 /* Starting from LAST_STMT, follow the defs of its uses in search
837 of the above pattern. */
839 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
840 return NULL;
842 oprnd0 = gimple_assign_rhs1 (last_stmt);
843 oprnd1 = gimple_assign_rhs2 (last_stmt);
844 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
845 || !types_compatible_p (TREE_TYPE (oprnd1), type))
846 return NULL;
848 /* Check argument 0. */
849 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
850 &promotion)
851 || !promotion)
852 return NULL;
853 /* Check argument 1. */
854 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
855 &def_stmt1, &promotion);
857 if (op1_ok && promotion)
859 oprnd0 = gimple_assign_rhs1 (def_stmt0);
860 oprnd1 = gimple_assign_rhs1 (def_stmt1);
862 else
864 if (TREE_CODE (oprnd1) == INTEGER_CST
865 && TREE_CODE (half_type0) == INTEGER_TYPE
866 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
867 &oprnd0, &new_stmt, type,
868 &half_type0, def_stmt0))
870 half_type1 = half_type0;
871 oprnd1 = fold_convert (half_type1, oprnd1);
873 else
874 return NULL;
877 /* If the two arguments have different sizes, convert the one with
878 the smaller type into the larger type. */
879 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
881 /* If we already used up the single-stmt slot give up. */
882 if (new_stmt)
883 return NULL;
885 tree* oprnd = NULL;
886 gimple *def_stmt = NULL;
888 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
890 def_stmt = def_stmt0;
891 half_type0 = half_type1;
892 oprnd = &oprnd0;
894 else
896 def_stmt = def_stmt1;
897 half_type1 = half_type0;
898 oprnd = &oprnd1;
901 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
902 tree new_oprnd = make_ssa_name (half_type0);
903 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
904 *oprnd = new_oprnd;
907 /* Handle unsigned case. Look for
908 S6 u_prod_T = (unsigned TYPE) prod_T;
909 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
910 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
912 gimple *use_stmt;
913 tree use_lhs;
914 tree use_type;
916 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
917 return NULL;
919 use_stmt = vect_single_imm_use (last_stmt);
920 if (!use_stmt || !is_gimple_assign (use_stmt)
921 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
922 return NULL;
924 use_lhs = gimple_assign_lhs (use_stmt);
925 use_type = TREE_TYPE (use_lhs);
926 if (!INTEGRAL_TYPE_P (use_type)
927 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
928 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
929 return NULL;
931 type = use_type;
932 last_stmt = use_stmt;
935 if (!types_compatible_p (half_type0, half_type1))
936 return NULL;
938 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
939 to get an intermediate result of type ITYPE. In this case we need
940 to build a statement to convert this intermediate result to type TYPE. */
941 tree itype = type;
942 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
943 itype = build_nonstandard_integer_type
944 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
945 TYPE_UNSIGNED (type));
947 /* Pattern detected. */
948 if (dump_enabled_p ())
949 dump_printf_loc (MSG_NOTE, vect_location,
950 "vect_recog_widen_mult_pattern: detected:\n");
952 /* Check target support */
953 vectype = get_vectype_for_scalar_type (half_type0);
954 vecitype = get_vectype_for_scalar_type (itype);
955 if (!vectype
956 || !vecitype
957 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
958 vecitype, vectype,
959 &dummy_code, &dummy_code,
960 &dummy_int, &dummy_vec))
961 return NULL;
963 *type_in = vectype;
964 *type_out = get_vectype_for_scalar_type (type);
966 /* Pattern supported. Create a stmt to be used to replace the pattern: */
967 var = vect_recog_temp_ssa_var (itype, NULL);
968 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
970 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
971 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
973 /* If the original two operands have different sizes, we may need to convert
974 the smaller one into the larget type. If this is the case, at this point
975 the new stmt is already built. */
976 if (new_stmt)
978 append_pattern_def_seq (stmt_vinfo, new_stmt);
979 stmt_vec_info new_stmt_info
980 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
981 set_vinfo_for_stmt (new_stmt, new_stmt_info);
982 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
985 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
986 the result of the widen-mult operation into type TYPE. */
987 if (itype != type)
989 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
990 stmt_vec_info pattern_stmt_info
991 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
992 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
993 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
994 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
995 NOP_EXPR,
996 gimple_assign_lhs (pattern_stmt));
999 if (dump_enabled_p ())
1000 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1002 stmts->safe_push (last_stmt);
1003 return pattern_stmt;
1007 /* Function vect_recog_pow_pattern
1009 Try to find the following pattern:
1011 x = POW (y, N);
1013 with POW being one of pow, powf, powi, powif and N being
1014 either 2 or 0.5.
1016 Input:
1018 * LAST_STMT: A stmt from which the pattern search begins.
1020 Output:
1022 * TYPE_IN: The type of the input arguments to the pattern.
1024 * TYPE_OUT: The type of the output of this pattern.
1026 * Return value: A new stmt that will be used to replace the sequence of
1027 stmts that constitute the pattern. In this case it will be:
1028 x = x * x
1030 x = sqrt (x)
1033 static gimple *
1034 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1035 tree *type_out)
1037 gimple *last_stmt = (*stmts)[0];
1038 tree base, exp = NULL;
1039 gimple *stmt;
1040 tree var;
1042 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1043 return NULL;
1045 switch (gimple_call_combined_fn (last_stmt))
1047 CASE_CFN_POW:
1048 CASE_CFN_POWI:
1049 base = gimple_call_arg (last_stmt, 0);
1050 exp = gimple_call_arg (last_stmt, 1);
1051 if (TREE_CODE (exp) != REAL_CST
1052 && TREE_CODE (exp) != INTEGER_CST)
1053 return NULL;
1054 break;
1056 default:
1057 return NULL;
1060 /* We now have a pow or powi builtin function call with a constant
1061 exponent. */
1063 *type_out = NULL_TREE;
1065 /* Catch squaring. */
1066 if ((tree_fits_shwi_p (exp)
1067 && tree_to_shwi (exp) == 2)
1068 || (TREE_CODE (exp) == REAL_CST
1069 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1071 *type_in = TREE_TYPE (base);
1073 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1074 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1075 return stmt;
1078 /* Catch square root. */
1079 if (TREE_CODE (exp) == REAL_CST
1080 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1082 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1083 if (*type_in
1084 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1085 OPTIMIZE_FOR_SPEED))
1087 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1088 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1089 gimple_call_set_lhs (stmt, var);
1090 return stmt;
1094 return NULL;
1098 /* Function vect_recog_widen_sum_pattern
1100 Try to find the following pattern:
1102 type x_t;
1103 TYPE x_T, sum = init;
1104 loop:
1105 sum_0 = phi <init, sum_1>
1106 S1 x_t = *p;
1107 S2 x_T = (TYPE) x_t;
1108 S3 sum_1 = x_T + sum_0;
1110 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1111 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1112 a special case of a reduction computation.
1114 Input:
1116 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1117 when this function is called with S3, the pattern {S2,S3} will be detected.
1119 Output:
1121 * TYPE_IN: The type of the input arguments to the pattern.
1123 * TYPE_OUT: The type of the output of this pattern.
1125 * Return value: A new stmt that will be used to replace the sequence of
1126 stmts that constitute the pattern. In this case it will be:
1127 WIDEN_SUM <x_t, sum_0>
1129 Note: The widening-sum idiom is a widening reduction pattern that is
1130 vectorized without preserving all the intermediate results. It
1131 produces only N/2 (widened) results (by summing up pairs of
1132 intermediate results) rather than all N results. Therefore, we
1133 cannot allow this pattern when we want to get all the results and in
1134 the correct order (as is the case when this computation is in an
1135 inner-loop nested in an outer-loop that us being vectorized). */
1137 static gimple *
1138 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1139 tree *type_out)
1141 gimple *stmt, *last_stmt = (*stmts)[0];
1142 tree oprnd0, oprnd1;
1143 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1144 tree type, half_type;
1145 gimple *pattern_stmt;
1146 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1147 struct loop *loop;
1148 tree var;
1149 bool promotion;
1151 if (!loop_info)
1152 return NULL;
1154 loop = LOOP_VINFO_LOOP (loop_info);
1156 /* We don't allow changing the order of the computation in the inner-loop
1157 when doing outer-loop vectorization. */
1158 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1159 return NULL;
1161 if (!is_gimple_assign (last_stmt))
1162 return NULL;
1164 type = gimple_expr_type (last_stmt);
1166 /* Look for the following pattern
1167 DX = (TYPE) X;
1168 sum_1 = DX + sum_0;
1169 In which DX is at least double the size of X, and sum_1 has been
1170 recognized as a reduction variable.
1173 /* Starting from LAST_STMT, follow the defs of its uses in search
1174 of the above pattern. */
1176 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1177 return NULL;
1179 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
1180 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
1181 return NULL;
1183 oprnd0 = gimple_assign_rhs1 (last_stmt);
1184 oprnd1 = gimple_assign_rhs2 (last_stmt);
1185 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1186 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1187 return NULL;
1189 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1190 we know that oprnd1 is the reduction variable (defined by a loop-header
1191 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1192 Left to check that oprnd0 is defined by a cast from type 'type' to type
1193 'TYPE'. */
1195 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1196 &promotion)
1197 || !promotion)
1198 return NULL;
1200 oprnd0 = gimple_assign_rhs1 (stmt);
1201 *type_in = half_type;
1202 *type_out = type;
1204 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1205 var = vect_recog_temp_ssa_var (type, NULL);
1206 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1208 if (dump_enabled_p ())
1210 dump_printf_loc (MSG_NOTE, vect_location,
1211 "vect_recog_widen_sum_pattern: detected: ");
1212 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1213 dump_printf (MSG_NOTE, "\n");
1216 return pattern_stmt;
1220 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1222 Input:
1223 STMT - a statement to check.
1224 DEF - we support operations with two operands, one of which is constant.
1225 The other operand can be defined by a demotion operation, or by a
1226 previous statement in a sequence of over-promoted operations. In the
1227 later case DEF is used to replace that operand. (It is defined by a
1228 pattern statement we created for the previous statement in the
1229 sequence).
1231 Input/output:
1232 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1233 NULL, it's the type of DEF.
1234 STMTS - additional pattern statements. If a pattern statement (type
1235 conversion) is created in this function, its original statement is
1236 added to STMTS.
1238 Output:
1239 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1240 operands to use in the new pattern statement for STMT (will be created
1241 in vect_recog_over_widening_pattern ()).
1242 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1243 statements for STMT: the first one is a type promotion and the second
1244 one is the operation itself. We return the type promotion statement
1245 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1246 the second pattern statement. */
1248 static bool
1249 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1250 tree *op0, tree *op1, gimple **new_def_stmt,
1251 vec<gimple *> *stmts)
1253 enum tree_code code;
1254 tree const_oprnd, oprnd;
1255 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1256 gimple *def_stmt, *new_stmt;
1257 bool first = false;
1258 bool promotion;
1260 *op0 = NULL_TREE;
1261 *op1 = NULL_TREE;
1262 *new_def_stmt = NULL;
1264 if (!is_gimple_assign (stmt))
1265 return false;
1267 code = gimple_assign_rhs_code (stmt);
1268 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1269 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1270 return false;
1272 oprnd = gimple_assign_rhs1 (stmt);
1273 const_oprnd = gimple_assign_rhs2 (stmt);
1274 type = gimple_expr_type (stmt);
1276 if (TREE_CODE (oprnd) != SSA_NAME
1277 || TREE_CODE (const_oprnd) != INTEGER_CST)
1278 return false;
1280 /* If oprnd has other uses besides that in stmt we cannot mark it
1281 as being part of a pattern only. */
1282 if (!has_single_use (oprnd))
1283 return false;
1285 /* If we are in the middle of a sequence, we use DEF from a previous
1286 statement. Otherwise, OPRND has to be a result of type promotion. */
1287 if (*new_type)
1289 half_type = *new_type;
1290 oprnd = def;
1292 else
1294 first = true;
1295 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1296 &promotion)
1297 || !promotion
1298 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1299 return false;
1302 /* Can we perform the operation on a smaller type? */
1303 switch (code)
1305 case BIT_IOR_EXPR:
1306 case BIT_XOR_EXPR:
1307 case BIT_AND_EXPR:
1308 if (!int_fits_type_p (const_oprnd, half_type))
1310 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1311 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1312 return false;
1314 interm_type = build_nonstandard_integer_type (
1315 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1316 if (!int_fits_type_p (const_oprnd, interm_type))
1317 return false;
1320 break;
1322 case LSHIFT_EXPR:
1323 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1324 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1325 return false;
1327 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1328 (e.g., if the original value was char, the shift amount is at most 8
1329 if we want to use short). */
1330 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1331 return false;
1333 interm_type = build_nonstandard_integer_type (
1334 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1336 if (!vect_supportable_shift (code, interm_type))
1337 return false;
1339 break;
1341 case RSHIFT_EXPR:
1342 if (vect_supportable_shift (code, half_type))
1343 break;
1345 /* Try intermediate type - HALF_TYPE is not supported. */
1346 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1347 return false;
1349 interm_type = build_nonstandard_integer_type (
1350 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1352 if (!vect_supportable_shift (code, interm_type))
1353 return false;
1355 break;
1357 default:
1358 gcc_unreachable ();
1361 /* There are four possible cases:
1362 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1363 the first statement in the sequence)
1364 a. The original, HALF_TYPE, is not enough - we replace the promotion
1365 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1366 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1367 promotion.
1368 2. OPRND is defined by a pattern statement we created.
1369 a. Its type is not sufficient for the operation, we create a new stmt:
1370 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1371 this statement in NEW_DEF_STMT, and it is later put in
1372 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1373 b. OPRND is good to use in the new statement. */
1374 if (first)
1376 if (interm_type)
1378 /* Replace the original type conversion HALF_TYPE->TYPE with
1379 HALF_TYPE->INTERM_TYPE. */
1380 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1382 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1383 /* Check if the already created pattern stmt is what we need. */
1384 if (!is_gimple_assign (new_stmt)
1385 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1386 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1387 return false;
1389 stmts->safe_push (def_stmt);
1390 oprnd = gimple_assign_lhs (new_stmt);
1392 else
1394 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1395 oprnd = gimple_assign_rhs1 (def_stmt);
1396 new_oprnd = make_ssa_name (interm_type);
1397 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1398 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1399 stmts->safe_push (def_stmt);
1400 oprnd = new_oprnd;
1403 else
1405 /* Retrieve the operand before the type promotion. */
1406 oprnd = gimple_assign_rhs1 (def_stmt);
1409 else
1411 if (interm_type)
1413 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1414 new_oprnd = make_ssa_name (interm_type);
1415 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1416 oprnd = new_oprnd;
1417 *new_def_stmt = new_stmt;
1420 /* Otherwise, OPRND is already set. */
1423 if (interm_type)
1424 *new_type = interm_type;
1425 else
1426 *new_type = half_type;
1428 *op0 = oprnd;
1429 *op1 = fold_convert (*new_type, const_oprnd);
1431 return true;
1435 /* Try to find a statement or a sequence of statements that can be performed
1436 on a smaller type:
1438 type x_t;
1439 TYPE x_T, res0_T, res1_T;
1440 loop:
1441 S1 x_t = *p;
1442 S2 x_T = (TYPE) x_t;
1443 S3 res0_T = op (x_T, C0);
1444 S4 res1_T = op (res0_T, C1);
1445 S5 ... = () res1_T; - type demotion
1447 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1448 constants.
1449 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1450 be 'type' or some intermediate type. For now, we expect S5 to be a type
1451 demotion operation. We also check that S3 and S4 have only one use. */
1453 static gimple *
1454 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1455 tree *type_in, tree *type_out)
1457 gimple *stmt = stmts->pop ();
1458 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1459 *use_stmt = NULL;
1460 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1461 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1462 bool first;
1463 tree type = NULL;
1465 first = true;
1466 while (1)
1468 if (!vinfo_for_stmt (stmt)
1469 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1470 return NULL;
1472 new_def_stmt = NULL;
1473 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1474 &op0, &op1, &new_def_stmt,
1475 stmts))
1477 if (first)
1478 return NULL;
1479 else
1480 break;
1483 /* STMT can be performed on a smaller type. Check its uses. */
1484 use_stmt = vect_single_imm_use (stmt);
1485 if (!use_stmt || !is_gimple_assign (use_stmt))
1486 return NULL;
1488 /* Create pattern statement for STMT. */
1489 vectype = get_vectype_for_scalar_type (new_type);
1490 if (!vectype)
1491 return NULL;
1493 /* We want to collect all the statements for which we create pattern
1494 statetments, except for the case when the last statement in the
1495 sequence doesn't have a corresponding pattern statement. In such
1496 case we associate the last pattern statement with the last statement
1497 in the sequence. Therefore, we only add the original statement to
1498 the list if we know that it is not the last. */
1499 if (prev_stmt)
1500 stmts->safe_push (prev_stmt);
1502 var = vect_recog_temp_ssa_var (new_type, NULL);
1503 pattern_stmt
1504 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1505 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1506 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1508 if (dump_enabled_p ())
1510 dump_printf_loc (MSG_NOTE, vect_location,
1511 "created pattern stmt: ");
1512 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1513 dump_printf (MSG_NOTE, "\n");
1516 type = gimple_expr_type (stmt);
1517 prev_stmt = stmt;
1518 stmt = use_stmt;
1520 first = false;
1523 /* We got a sequence. We expect it to end with a type demotion operation.
1524 Otherwise, we quit (for now). There are three possible cases: the
1525 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1526 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1527 NEW_TYPE differs (we create a new conversion statement). */
1528 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1530 use_lhs = gimple_assign_lhs (use_stmt);
1531 use_type = TREE_TYPE (use_lhs);
1532 /* Support only type demotion or signedess change. */
1533 if (!INTEGRAL_TYPE_P (use_type)
1534 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1535 return NULL;
1537 /* Check that NEW_TYPE is not bigger than the conversion result. */
1538 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1539 return NULL;
1541 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1542 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1544 /* Create NEW_TYPE->USE_TYPE conversion. */
1545 new_oprnd = make_ssa_name (use_type);
1546 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1547 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1549 *type_in = get_vectype_for_scalar_type (new_type);
1550 *type_out = get_vectype_for_scalar_type (use_type);
1552 /* We created a pattern statement for the last statement in the
1553 sequence, so we don't need to associate it with the pattern
1554 statement created for PREV_STMT. Therefore, we add PREV_STMT
1555 to the list in order to mark it later in vect_pattern_recog_1. */
1556 if (prev_stmt)
1557 stmts->safe_push (prev_stmt);
1559 else
1561 if (prev_stmt)
1562 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1563 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1565 *type_in = vectype;
1566 *type_out = NULL_TREE;
1569 stmts->safe_push (use_stmt);
1571 else
1572 /* TODO: support general case, create a conversion to the correct type. */
1573 return NULL;
1575 /* Pattern detected. */
1576 if (dump_enabled_p ())
1578 dump_printf_loc (MSG_NOTE, vect_location,
1579 "vect_recog_over_widening_pattern: detected: ");
1580 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1581 dump_printf (MSG_NOTE, "\n");
1584 return pattern_stmt;
1587 /* Detect widening shift pattern:
1589 type a_t;
1590 TYPE a_T, res_T;
1592 S1 a_t = ;
1593 S2 a_T = (TYPE) a_t;
1594 S3 res_T = a_T << CONST;
1596 where type 'TYPE' is at least double the size of type 'type'.
1598 Also detect cases where the shift result is immediately converted
1599 to another type 'result_type' that is no larger in size than 'TYPE'.
1600 In those cases we perform a widen-shift that directly results in
1601 'result_type', to avoid a possible over-widening situation:
1603 type a_t;
1604 TYPE a_T, res_T;
1605 result_type res_result;
1607 S1 a_t = ;
1608 S2 a_T = (TYPE) a_t;
1609 S3 res_T = a_T << CONST;
1610 S4 res_result = (result_type) res_T;
1611 '--> res_result' = a_t w<< CONST;
1613 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1614 create an additional pattern stmt for S2 to create a variable of an
1615 intermediate type, and perform widen-shift on the intermediate type:
1617 type a_t;
1618 interm_type a_it;
1619 TYPE a_T, res_T, res_T';
1621 S1 a_t = ;
1622 S2 a_T = (TYPE) a_t;
1623 '--> a_it = (interm_type) a_t;
1624 S3 res_T = a_T << CONST;
1625 '--> res_T' = a_it <<* CONST;
1627 Input/Output:
1629 * STMTS: Contains a stmt from which the pattern search begins.
1630 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1631 in STMTS. When an intermediate type is used and a pattern statement is
1632 created for S2, we also put S2 here (before S3).
1634 Output:
1636 * TYPE_IN: The type of the input arguments to the pattern.
1638 * TYPE_OUT: The type of the output of this pattern.
1640 * Return value: A new stmt that will be used to replace the sequence of
1641 stmts that constitute the pattern. In this case it will be:
1642 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1644 static gimple *
1645 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1646 tree *type_in, tree *type_out)
1648 gimple *last_stmt = stmts->pop ();
1649 gimple *def_stmt0;
1650 tree oprnd0, oprnd1;
1651 tree type, half_type0;
1652 gimple *pattern_stmt;
1653 tree vectype, vectype_out = NULL_TREE;
1654 tree var;
1655 enum tree_code dummy_code;
1656 int dummy_int;
1657 vec<tree> dummy_vec;
1658 gimple *use_stmt;
1659 bool promotion;
1661 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1662 return NULL;
1664 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1665 return NULL;
1667 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1668 return NULL;
1670 oprnd0 = gimple_assign_rhs1 (last_stmt);
1671 oprnd1 = gimple_assign_rhs2 (last_stmt);
1672 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1673 return NULL;
1675 /* Check operand 0: it has to be defined by a type promotion. */
1676 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1677 &promotion)
1678 || !promotion)
1679 return NULL;
1681 /* Check operand 1: has to be positive. We check that it fits the type
1682 in vect_handle_widen_op_by_const (). */
1683 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1684 return NULL;
1686 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1687 type = gimple_expr_type (last_stmt);
1689 /* Check for subsequent conversion to another type. */
1690 use_stmt = vect_single_imm_use (last_stmt);
1691 if (use_stmt && is_gimple_assign (use_stmt)
1692 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1693 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1695 tree use_lhs = gimple_assign_lhs (use_stmt);
1696 tree use_type = TREE_TYPE (use_lhs);
1698 if (INTEGRAL_TYPE_P (use_type)
1699 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1701 last_stmt = use_stmt;
1702 type = use_type;
1706 /* Check if this a widening operation. */
1707 gimple *wstmt = NULL;
1708 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1709 &oprnd0, &wstmt,
1710 type, &half_type0, def_stmt0))
1711 return NULL;
1713 /* Pattern detected. */
1714 if (dump_enabled_p ())
1715 dump_printf_loc (MSG_NOTE, vect_location,
1716 "vect_recog_widen_shift_pattern: detected:\n");
1718 /* Check target support. */
1719 vectype = get_vectype_for_scalar_type (half_type0);
1720 vectype_out = get_vectype_for_scalar_type (type);
1722 if (!vectype
1723 || !vectype_out
1724 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1725 vectype_out, vectype,
1726 &dummy_code, &dummy_code,
1727 &dummy_int, &dummy_vec))
1728 return NULL;
1730 *type_in = vectype;
1731 *type_out = vectype_out;
1733 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1734 var = vect_recog_temp_ssa_var (type, NULL);
1735 pattern_stmt =
1736 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1737 if (wstmt)
1739 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1740 new_pattern_def_seq (stmt_vinfo, wstmt);
1741 stmt_vec_info new_stmt_info
1742 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1743 set_vinfo_for_stmt (wstmt, new_stmt_info);
1744 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1747 if (dump_enabled_p ())
1748 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1750 stmts->safe_push (last_stmt);
1751 return pattern_stmt;
1754 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1756 type a_t, b_t, c_t;
1758 S0 a_t = b_t r<< c_t;
1760 Input/Output:
1762 * STMTS: Contains a stmt from which the pattern search begins,
1763 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1764 with a sequence:
1766 S1 d_t = -c_t;
1767 S2 e_t = d_t & (B - 1);
1768 S3 f_t = b_t << c_t;
1769 S4 g_t = b_t >> e_t;
1770 S0 a_t = f_t | g_t;
1772 where B is element bitsize of type.
1774 Output:
1776 * TYPE_IN: The type of the input arguments to the pattern.
1778 * TYPE_OUT: The type of the output of this pattern.
1780 * Return value: A new stmt that will be used to replace the rotate
1781 S0 stmt. */
1783 static gimple *
1784 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1786 gimple *last_stmt = stmts->pop ();
1787 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1788 gimple *pattern_stmt, *def_stmt;
1789 enum tree_code rhs_code;
1790 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1791 vec_info *vinfo = stmt_vinfo->vinfo;
1792 enum vect_def_type dt;
1793 optab optab1, optab2;
1794 edge ext_def = NULL;
1796 if (!is_gimple_assign (last_stmt))
1797 return NULL;
1799 rhs_code = gimple_assign_rhs_code (last_stmt);
1800 switch (rhs_code)
1802 case LROTATE_EXPR:
1803 case RROTATE_EXPR:
1804 break;
1805 default:
1806 return NULL;
1809 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1810 return NULL;
1812 lhs = gimple_assign_lhs (last_stmt);
1813 oprnd0 = gimple_assign_rhs1 (last_stmt);
1814 type = TREE_TYPE (oprnd0);
1815 oprnd1 = gimple_assign_rhs2 (last_stmt);
1816 if (TREE_CODE (oprnd0) != SSA_NAME
1817 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1818 || !INTEGRAL_TYPE_P (type)
1819 || !TYPE_UNSIGNED (type))
1820 return NULL;
1822 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1823 return NULL;
1825 if (dt != vect_internal_def
1826 && dt != vect_constant_def
1827 && dt != vect_external_def)
1828 return NULL;
1830 vectype = get_vectype_for_scalar_type (type);
1831 if (vectype == NULL_TREE)
1832 return NULL;
1834 /* If vector/vector or vector/scalar rotate is supported by the target,
1835 don't do anything here. */
1836 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1837 if (optab1
1838 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1839 return NULL;
1841 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1843 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1844 if (optab2
1845 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1846 return NULL;
1849 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1850 don't do anything here either. */
1851 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1852 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1853 if (!optab1
1854 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1855 || !optab2
1856 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1858 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1859 return NULL;
1860 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1861 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1862 if (!optab1
1863 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1864 || !optab2
1865 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1866 return NULL;
1869 *type_in = vectype;
1870 *type_out = vectype;
1871 if (*type_in == NULL_TREE)
1872 return NULL;
1874 if (dt == vect_external_def
1875 && TREE_CODE (oprnd1) == SSA_NAME
1876 && is_a <loop_vec_info> (vinfo))
1878 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1879 ext_def = loop_preheader_edge (loop);
1880 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1882 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1883 if (bb == NULL
1884 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1885 ext_def = NULL;
1889 def = NULL_TREE;
1890 if (TREE_CODE (oprnd1) == INTEGER_CST
1891 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1892 def = oprnd1;
1893 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1895 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1896 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1897 && TYPE_PRECISION (TREE_TYPE (rhs1))
1898 == TYPE_PRECISION (type))
1899 def = rhs1;
1902 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1903 if (def == NULL_TREE)
1905 def = vect_recog_temp_ssa_var (type, NULL);
1906 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1907 if (ext_def)
1909 basic_block new_bb
1910 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1911 gcc_assert (!new_bb);
1913 else
1914 append_pattern_def_seq (stmt_vinfo, def_stmt);
1916 stype = TREE_TYPE (def);
1918 if (TREE_CODE (def) == INTEGER_CST)
1920 if (!tree_fits_uhwi_p (def)
1921 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1922 || integer_zerop (def))
1923 return NULL;
1924 def2 = build_int_cst (stype,
1925 GET_MODE_PRECISION (TYPE_MODE (type))
1926 - tree_to_uhwi (def));
1928 else
1930 tree vecstype = get_vectype_for_scalar_type (stype);
1931 stmt_vec_info def_stmt_vinfo;
1933 if (vecstype == NULL_TREE)
1934 return NULL;
1935 def2 = vect_recog_temp_ssa_var (stype, NULL);
1936 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1937 if (ext_def)
1939 basic_block new_bb
1940 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1941 gcc_assert (!new_bb);
1943 else
1945 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1946 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1947 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1948 append_pattern_def_seq (stmt_vinfo, def_stmt);
1951 def2 = vect_recog_temp_ssa_var (stype, NULL);
1952 tree mask
1953 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1954 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1955 gimple_assign_lhs (def_stmt), mask);
1956 if (ext_def)
1958 basic_block new_bb
1959 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1960 gcc_assert (!new_bb);
1962 else
1964 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1965 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1966 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1967 append_pattern_def_seq (stmt_vinfo, def_stmt);
1971 var1 = vect_recog_temp_ssa_var (type, NULL);
1972 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1973 ? LSHIFT_EXPR : RSHIFT_EXPR,
1974 oprnd0, def);
1975 append_pattern_def_seq (stmt_vinfo, def_stmt);
1977 var2 = vect_recog_temp_ssa_var (type, NULL);
1978 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1979 ? RSHIFT_EXPR : LSHIFT_EXPR,
1980 oprnd0, def2);
1981 append_pattern_def_seq (stmt_vinfo, def_stmt);
1983 /* Pattern detected. */
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_NOTE, vect_location,
1986 "vect_recog_rotate_pattern: detected:\n");
1988 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1989 var = vect_recog_temp_ssa_var (type, NULL);
1990 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1992 if (dump_enabled_p ())
1993 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1995 stmts->safe_push (last_stmt);
1996 return pattern_stmt;
1999 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2000 vectorized:
2002 type a_t;
2003 TYPE b_T, res_T;
2005 S1 a_t = ;
2006 S2 b_T = ;
2007 S3 res_T = b_T op a_t;
2009 where type 'TYPE' is a type with different size than 'type',
2010 and op is <<, >> or rotate.
2012 Also detect cases:
2014 type a_t;
2015 TYPE b_T, c_T, res_T;
2017 S0 c_T = ;
2018 S1 a_t = (type) c_T;
2019 S2 b_T = ;
2020 S3 res_T = b_T op a_t;
2022 Input/Output:
2024 * STMTS: Contains a stmt from which the pattern search begins,
2025 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2026 with a shift/rotate which has same type on both operands, in the
2027 second case just b_T op c_T, in the first case with added cast
2028 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2030 Output:
2032 * TYPE_IN: The type of the input arguments to the pattern.
2034 * TYPE_OUT: The type of the output of this pattern.
2036 * Return value: A new stmt that will be used to replace the shift/rotate
2037 S3 stmt. */
2039 static gimple *
2040 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2041 tree *type_in, tree *type_out)
2043 gimple *last_stmt = stmts->pop ();
2044 tree oprnd0, oprnd1, lhs, var;
2045 gimple *pattern_stmt, *def_stmt;
2046 enum tree_code rhs_code;
2047 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2048 vec_info *vinfo = stmt_vinfo->vinfo;
2049 enum vect_def_type dt;
2051 if (!is_gimple_assign (last_stmt))
2052 return NULL;
2054 rhs_code = gimple_assign_rhs_code (last_stmt);
2055 switch (rhs_code)
2057 case LSHIFT_EXPR:
2058 case RSHIFT_EXPR:
2059 case LROTATE_EXPR:
2060 case RROTATE_EXPR:
2061 break;
2062 default:
2063 return NULL;
2066 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2067 return NULL;
2069 lhs = gimple_assign_lhs (last_stmt);
2070 oprnd0 = gimple_assign_rhs1 (last_stmt);
2071 oprnd1 = gimple_assign_rhs2 (last_stmt);
2072 if (TREE_CODE (oprnd0) != SSA_NAME
2073 || TREE_CODE (oprnd1) != SSA_NAME
2074 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2075 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2076 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2077 || TYPE_PRECISION (TREE_TYPE (lhs))
2078 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2079 return NULL;
2081 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2082 return NULL;
2084 if (dt != vect_internal_def)
2085 return NULL;
2087 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2088 *type_out = *type_in;
2089 if (*type_in == NULL_TREE)
2090 return NULL;
2092 tree def = NULL_TREE;
2093 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2094 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2096 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2097 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2098 && TYPE_PRECISION (TREE_TYPE (rhs1))
2099 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2101 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2102 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2103 def = rhs1;
2104 else
2106 tree mask
2107 = build_low_bits_mask (TREE_TYPE (rhs1),
2108 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2109 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2110 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2111 new_pattern_def_seq (stmt_vinfo, def_stmt);
2116 if (def == NULL_TREE)
2118 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2119 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2120 new_pattern_def_seq (stmt_vinfo, def_stmt);
2123 /* Pattern detected. */
2124 if (dump_enabled_p ())
2125 dump_printf_loc (MSG_NOTE, vect_location,
2126 "vect_recog_vector_vector_shift_pattern: detected:\n");
2128 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2129 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2130 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2132 if (dump_enabled_p ())
2133 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2135 stmts->safe_push (last_stmt);
2136 return pattern_stmt;
2139 /* Detect multiplication by constant which are postive or negatives of power 2,
2140 and convert them to shift patterns.
2142 Mult with constants that are postive power of two.
2143 type a_t;
2144 type b_t
2145 S1: b_t = a_t * n
2149 Mult with constants that are negative power of two.
2150 S2: b_t = a_t * -n
2152 Input/Output:
2154 STMTS: Contains a stmt from which the pattern search begins,
2155 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2156 constant operand is a power of 2.
2157 type a_t, b_t
2158 S1': b_t = a_t << log2 (n)
2160 Convert the mult operation to LSHIFT and followed by a NEGATE
2161 if constant operand is a negative power of 2.
2162 type a_t, b_t, res_T;
2163 S2': b_t = a_t << log2 (n)
2164 S3': res_T = - (b_t)
2166 Output:
2168 * TYPE_IN: The type of the input arguments to the pattern.
2170 * TYPE_OUT: The type of the output of this pattern.
2172 * Return value: A new stmt that will be used to replace the multiplication
2173 S1 or S2 stmt. */
2175 static gimple *
2176 vect_recog_mult_pattern (vec<gimple *> *stmts,
2177 tree *type_in, tree *type_out)
2179 gimple *last_stmt = stmts->pop ();
2180 tree oprnd0, oprnd1, vectype, itype;
2181 gimple *pattern_stmt, *def_stmt;
2182 optab optab;
2183 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2184 int power2_val, power2_neg_val;
2185 tree shift;
2187 if (!is_gimple_assign (last_stmt))
2188 return NULL;
2190 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2191 return NULL;
2193 oprnd0 = gimple_assign_rhs1 (last_stmt);
2194 oprnd1 = gimple_assign_rhs2 (last_stmt);
2195 itype = TREE_TYPE (oprnd0);
2197 if (TREE_CODE (oprnd0) != SSA_NAME
2198 || TREE_CODE (oprnd1) != INTEGER_CST
2199 || !INTEGRAL_TYPE_P (itype)
2200 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2201 return NULL;
2203 vectype = get_vectype_for_scalar_type (itype);
2204 if (vectype == NULL_TREE)
2205 return NULL;
2207 /* If the target can handle vectorized multiplication natively,
2208 don't attempt to optimize this. */
2209 optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2210 if (optab != unknown_optab)
2212 machine_mode vec_mode = TYPE_MODE (vectype);
2213 int icode = (int) optab_handler (optab, vec_mode);
2214 if (icode != CODE_FOR_nothing)
2215 return NULL;
2218 /* If target cannot handle vector left shift then we cannot
2219 optimize and bail out. */
2220 optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2221 if (!optab
2222 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2223 return NULL;
2225 power2_val = wi::exact_log2 (oprnd1);
2226 power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2228 /* Handle constant operands that are postive or negative powers of 2. */
2229 if (power2_val != -1)
2231 shift = build_int_cst (itype, power2_val);
2232 pattern_stmt
2233 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2234 LSHIFT_EXPR, oprnd0, shift);
2236 else if (power2_neg_val != -1)
2238 /* If the target cannot handle vector NEGATE then we cannot
2239 do the optimization. */
2240 optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2241 if (!optab
2242 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2243 return NULL;
2245 shift = build_int_cst (itype, power2_neg_val);
2246 def_stmt
2247 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2248 LSHIFT_EXPR, oprnd0, shift);
2249 new_pattern_def_seq (stmt_vinfo, def_stmt);
2250 pattern_stmt
2251 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2252 NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2254 else
2255 return NULL;
2257 /* Pattern detected. */
2258 if (dump_enabled_p ())
2259 dump_printf_loc (MSG_NOTE, vect_location,
2260 "vect_recog_mult_pattern: detected:\n");
2262 if (dump_enabled_p ())
2263 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2264 pattern_stmt,0);
2266 stmts->safe_push (last_stmt);
2267 *type_in = vectype;
2268 *type_out = vectype;
2270 return pattern_stmt;
2273 /* Detect a signed division by a constant that wouldn't be
2274 otherwise vectorized:
2276 type a_t, b_t;
2278 S1 a_t = b_t / N;
2280 where type 'type' is an integral type and N is a constant.
2282 Similarly handle modulo by a constant:
2284 S4 a_t = b_t % N;
2286 Input/Output:
2288 * STMTS: Contains a stmt from which the pattern search begins,
2289 i.e. the division stmt. S1 is replaced by if N is a power
2290 of two constant and type is signed:
2291 S3 y_t = b_t < 0 ? N - 1 : 0;
2292 S2 x_t = b_t + y_t;
2293 S1' a_t = x_t >> log2 (N);
2295 S4 is replaced if N is a power of two constant and
2296 type is signed by (where *_T temporaries have unsigned type):
2297 S9 y_T = b_t < 0 ? -1U : 0U;
2298 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2299 S7 z_t = (type) z_T;
2300 S6 w_t = b_t + z_t;
2301 S5 x_t = w_t & (N - 1);
2302 S4' a_t = x_t - z_t;
2304 Output:
2306 * TYPE_IN: The type of the input arguments to the pattern.
2308 * TYPE_OUT: The type of the output of this pattern.
2310 * Return value: A new stmt that will be used to replace the division
2311 S1 or modulo S4 stmt. */
2313 static gimple *
2314 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2315 tree *type_in, tree *type_out)
2317 gimple *last_stmt = stmts->pop ();
2318 tree oprnd0, oprnd1, vectype, itype, cond;
2319 gimple *pattern_stmt, *def_stmt;
2320 enum tree_code rhs_code;
2321 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2322 vec_info *vinfo = stmt_vinfo->vinfo;
2323 optab optab;
2324 tree q;
2325 int dummy_int, prec;
2326 stmt_vec_info def_stmt_vinfo;
2328 if (!is_gimple_assign (last_stmt))
2329 return NULL;
2331 rhs_code = gimple_assign_rhs_code (last_stmt);
2332 switch (rhs_code)
2334 case TRUNC_DIV_EXPR:
2335 case TRUNC_MOD_EXPR:
2336 break;
2337 default:
2338 return NULL;
2341 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2342 return NULL;
2344 oprnd0 = gimple_assign_rhs1 (last_stmt);
2345 oprnd1 = gimple_assign_rhs2 (last_stmt);
2346 itype = TREE_TYPE (oprnd0);
2347 if (TREE_CODE (oprnd0) != SSA_NAME
2348 || TREE_CODE (oprnd1) != INTEGER_CST
2349 || TREE_CODE (itype) != INTEGER_TYPE
2350 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2351 return NULL;
2353 vectype = get_vectype_for_scalar_type (itype);
2354 if (vectype == NULL_TREE)
2355 return NULL;
2357 /* If the target can handle vectorized division or modulo natively,
2358 don't attempt to optimize this. */
2359 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2360 if (optab != unknown_optab)
2362 machine_mode vec_mode = TYPE_MODE (vectype);
2363 int icode = (int) optab_handler (optab, vec_mode);
2364 if (icode != CODE_FOR_nothing)
2365 return NULL;
2368 prec = TYPE_PRECISION (itype);
2369 if (integer_pow2p (oprnd1))
2371 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2372 return NULL;
2374 /* Pattern detected. */
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_NOTE, vect_location,
2377 "vect_recog_divmod_pattern: detected:\n");
2379 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2380 build_int_cst (itype, 0));
2381 if (rhs_code == TRUNC_DIV_EXPR)
2383 tree var = vect_recog_temp_ssa_var (itype, NULL);
2384 tree shift;
2385 def_stmt
2386 = gimple_build_assign (var, COND_EXPR, cond,
2387 fold_build2 (MINUS_EXPR, itype, oprnd1,
2388 build_int_cst (itype, 1)),
2389 build_int_cst (itype, 0));
2390 new_pattern_def_seq (stmt_vinfo, def_stmt);
2391 var = vect_recog_temp_ssa_var (itype, NULL);
2392 def_stmt
2393 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2394 gimple_assign_lhs (def_stmt));
2395 append_pattern_def_seq (stmt_vinfo, def_stmt);
2397 shift = build_int_cst (itype, tree_log2 (oprnd1));
2398 pattern_stmt
2399 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2400 RSHIFT_EXPR, var, shift);
2402 else
2404 tree signmask;
2405 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2406 if (compare_tree_int (oprnd1, 2) == 0)
2408 signmask = vect_recog_temp_ssa_var (itype, NULL);
2409 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2410 build_int_cst (itype, 1),
2411 build_int_cst (itype, 0));
2412 append_pattern_def_seq (stmt_vinfo, def_stmt);
2414 else
2416 tree utype
2417 = build_nonstandard_integer_type (prec, 1);
2418 tree vecutype = get_vectype_for_scalar_type (utype);
2419 tree shift
2420 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2421 - tree_log2 (oprnd1));
2422 tree var = vect_recog_temp_ssa_var (utype, NULL);
2424 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2425 build_int_cst (utype, -1),
2426 build_int_cst (utype, 0));
2427 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2428 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2429 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2430 append_pattern_def_seq (stmt_vinfo, def_stmt);
2431 var = vect_recog_temp_ssa_var (utype, NULL);
2432 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2433 gimple_assign_lhs (def_stmt),
2434 shift);
2435 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2436 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2437 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2438 append_pattern_def_seq (stmt_vinfo, def_stmt);
2439 signmask = vect_recog_temp_ssa_var (itype, NULL);
2440 def_stmt
2441 = gimple_build_assign (signmask, NOP_EXPR, var);
2442 append_pattern_def_seq (stmt_vinfo, def_stmt);
2444 def_stmt
2445 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2446 PLUS_EXPR, oprnd0, signmask);
2447 append_pattern_def_seq (stmt_vinfo, def_stmt);
2448 def_stmt
2449 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2450 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2451 fold_build2 (MINUS_EXPR, itype, oprnd1,
2452 build_int_cst (itype, 1)));
2453 append_pattern_def_seq (stmt_vinfo, def_stmt);
2455 pattern_stmt
2456 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2457 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2458 signmask);
2461 if (dump_enabled_p ())
2462 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2465 stmts->safe_push (last_stmt);
2467 *type_in = vectype;
2468 *type_out = vectype;
2469 return pattern_stmt;
2472 if (prec > HOST_BITS_PER_WIDE_INT
2473 || integer_zerop (oprnd1))
2474 return NULL;
2476 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2477 return NULL;
2479 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2481 if (TYPE_UNSIGNED (itype))
2483 unsigned HOST_WIDE_INT mh, ml;
2484 int pre_shift, post_shift;
2485 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2486 & GET_MODE_MASK (TYPE_MODE (itype)));
2487 tree t1, t2, t3, t4;
2489 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2490 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2491 return NULL;
2493 /* Find a suitable multiplier and right shift count
2494 instead of multiplying with D. */
2495 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2497 /* If the suggested multiplier is more than SIZE bits, we can do better
2498 for even divisors, using an initial right shift. */
2499 if (mh != 0 && (d & 1) == 0)
2501 pre_shift = floor_log2 (d & -d);
2502 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2503 &ml, &post_shift, &dummy_int);
2504 gcc_assert (!mh);
2506 else
2507 pre_shift = 0;
2509 if (mh != 0)
2511 if (post_shift - 1 >= prec)
2512 return NULL;
2514 /* t1 = oprnd0 h* ml;
2515 t2 = oprnd0 - t1;
2516 t3 = t2 >> 1;
2517 t4 = t1 + t3;
2518 q = t4 >> (post_shift - 1); */
2519 t1 = vect_recog_temp_ssa_var (itype, NULL);
2520 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2521 build_int_cst (itype, ml));
2522 append_pattern_def_seq (stmt_vinfo, def_stmt);
2524 t2 = vect_recog_temp_ssa_var (itype, NULL);
2525 def_stmt
2526 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2527 append_pattern_def_seq (stmt_vinfo, def_stmt);
2529 t3 = vect_recog_temp_ssa_var (itype, NULL);
2530 def_stmt
2531 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2532 append_pattern_def_seq (stmt_vinfo, def_stmt);
2534 t4 = vect_recog_temp_ssa_var (itype, NULL);
2535 def_stmt
2536 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2538 if (post_shift != 1)
2540 append_pattern_def_seq (stmt_vinfo, def_stmt);
2542 q = vect_recog_temp_ssa_var (itype, NULL);
2543 pattern_stmt
2544 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2545 build_int_cst (itype, post_shift - 1));
2547 else
2549 q = t4;
2550 pattern_stmt = def_stmt;
2553 else
2555 if (pre_shift >= prec || post_shift >= prec)
2556 return NULL;
2558 /* t1 = oprnd0 >> pre_shift;
2559 t2 = t1 h* ml;
2560 q = t2 >> post_shift; */
2561 if (pre_shift)
2563 t1 = vect_recog_temp_ssa_var (itype, NULL);
2564 def_stmt
2565 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2566 build_int_cst (NULL, pre_shift));
2567 append_pattern_def_seq (stmt_vinfo, def_stmt);
2569 else
2570 t1 = oprnd0;
2572 t2 = vect_recog_temp_ssa_var (itype, NULL);
2573 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2574 build_int_cst (itype, ml));
2576 if (post_shift)
2578 append_pattern_def_seq (stmt_vinfo, def_stmt);
2580 q = vect_recog_temp_ssa_var (itype, NULL);
2581 def_stmt
2582 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2583 build_int_cst (itype, post_shift));
2585 else
2586 q = t2;
2588 pattern_stmt = def_stmt;
2591 else
2593 unsigned HOST_WIDE_INT ml;
2594 int post_shift;
2595 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2596 unsigned HOST_WIDE_INT abs_d;
2597 bool add = false;
2598 tree t1, t2, t3, t4;
2600 /* Give up for -1. */
2601 if (d == -1)
2602 return NULL;
2604 /* Since d might be INT_MIN, we have to cast to
2605 unsigned HOST_WIDE_INT before negating to avoid
2606 undefined signed overflow. */
2607 abs_d = (d >= 0
2608 ? (unsigned HOST_WIDE_INT) d
2609 : - (unsigned HOST_WIDE_INT) d);
2611 /* n rem d = n rem -d */
2612 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2614 d = abs_d;
2615 oprnd1 = build_int_cst (itype, abs_d);
2617 else if (HOST_BITS_PER_WIDE_INT >= prec
2618 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2619 /* This case is not handled correctly below. */
2620 return NULL;
2622 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2623 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2625 add = true;
2626 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2628 if (post_shift >= prec)
2629 return NULL;
2631 /* t1 = oprnd0 h* ml; */
2632 t1 = vect_recog_temp_ssa_var (itype, NULL);
2633 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2634 build_int_cst (itype, ml));
2636 if (add)
2638 /* t2 = t1 + oprnd0; */
2639 append_pattern_def_seq (stmt_vinfo, def_stmt);
2640 t2 = vect_recog_temp_ssa_var (itype, NULL);
2641 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2643 else
2644 t2 = t1;
2646 if (post_shift)
2648 /* t3 = t2 >> post_shift; */
2649 append_pattern_def_seq (stmt_vinfo, def_stmt);
2650 t3 = vect_recog_temp_ssa_var (itype, NULL);
2651 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2652 build_int_cst (itype, post_shift));
2654 else
2655 t3 = t2;
2657 wide_int oprnd0_min, oprnd0_max;
2658 int msb = 1;
2659 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2661 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2662 msb = 0;
2663 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2664 msb = -1;
2667 if (msb == 0 && d >= 0)
2669 /* q = t3; */
2670 q = t3;
2671 pattern_stmt = def_stmt;
2673 else
2675 /* t4 = oprnd0 >> (prec - 1);
2676 or if we know from VRP that oprnd0 >= 0
2677 t4 = 0;
2678 or if we know from VRP that oprnd0 < 0
2679 t4 = -1; */
2680 append_pattern_def_seq (stmt_vinfo, def_stmt);
2681 t4 = vect_recog_temp_ssa_var (itype, NULL);
2682 if (msb != 1)
2683 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2684 build_int_cst (itype, msb));
2685 else
2686 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2687 build_int_cst (itype, prec - 1));
2688 append_pattern_def_seq (stmt_vinfo, def_stmt);
2690 /* q = t3 - t4; or q = t4 - t3; */
2691 q = vect_recog_temp_ssa_var (itype, NULL);
2692 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2693 d < 0 ? t3 : t4);
2697 if (rhs_code == TRUNC_MOD_EXPR)
2699 tree r, t1;
2701 /* We divided. Now finish by:
2702 t1 = q * oprnd1;
2703 r = oprnd0 - t1; */
2704 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2706 t1 = vect_recog_temp_ssa_var (itype, NULL);
2707 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2708 append_pattern_def_seq (stmt_vinfo, def_stmt);
2710 r = vect_recog_temp_ssa_var (itype, NULL);
2711 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2714 /* Pattern detected. */
2715 if (dump_enabled_p ())
2717 dump_printf_loc (MSG_NOTE, vect_location,
2718 "vect_recog_divmod_pattern: detected: ");
2719 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2720 dump_printf (MSG_NOTE, "\n");
2723 stmts->safe_push (last_stmt);
2725 *type_in = vectype;
2726 *type_out = vectype;
2727 return pattern_stmt;
2730 /* Function vect_recog_mixed_size_cond_pattern
2732 Try to find the following pattern:
2734 type x_t, y_t;
2735 TYPE a_T, b_T, c_T;
2736 loop:
2737 S1 a_T = x_t CMP y_t ? b_T : c_T;
2739 where type 'TYPE' is an integral type which has different size
2740 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2741 than 'type', the constants need to fit into an integer type
2742 with the same width as 'type') or results of conversion from 'type'.
2744 Input:
2746 * LAST_STMT: A stmt from which the pattern search begins.
2748 Output:
2750 * TYPE_IN: The type of the input arguments to the pattern.
2752 * TYPE_OUT: The type of the output of this pattern.
2754 * Return value: A new stmt that will be used to replace the pattern.
2755 Additionally a def_stmt is added.
2757 a_it = x_t CMP y_t ? b_it : c_it;
2758 a_T = (TYPE) a_it; */
2760 static gimple *
2761 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
2762 tree *type_out)
2764 gimple *last_stmt = (*stmts)[0];
2765 tree cond_expr, then_clause, else_clause;
2766 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2767 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2768 gimple *pattern_stmt, *def_stmt;
2769 vec_info *vinfo = stmt_vinfo->vinfo;
2770 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2771 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
2772 bool promotion;
2773 tree comp_scalar_type;
2775 if (!is_gimple_assign (last_stmt)
2776 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2777 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2778 return NULL;
2780 cond_expr = gimple_assign_rhs1 (last_stmt);
2781 then_clause = gimple_assign_rhs2 (last_stmt);
2782 else_clause = gimple_assign_rhs3 (last_stmt);
2784 if (!COMPARISON_CLASS_P (cond_expr))
2785 return NULL;
2787 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2788 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2789 if (comp_vectype == NULL_TREE)
2790 return NULL;
2792 type = gimple_expr_type (last_stmt);
2793 if (types_compatible_p (type, comp_scalar_type)
2794 || ((TREE_CODE (then_clause) != INTEGER_CST
2795 || TREE_CODE (else_clause) != INTEGER_CST)
2796 && !INTEGRAL_TYPE_P (comp_scalar_type))
2797 || !INTEGRAL_TYPE_P (type))
2798 return NULL;
2800 if ((TREE_CODE (then_clause) != INTEGER_CST
2801 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2802 &def_stmt0, &promotion))
2803 || (TREE_CODE (else_clause) != INTEGER_CST
2804 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2805 &def_stmt1, &promotion)))
2806 return NULL;
2808 if (orig_type0 && orig_type1
2809 && !types_compatible_p (orig_type0, orig_type1))
2810 return NULL;
2812 if (orig_type0)
2814 if (!types_compatible_p (orig_type0, comp_scalar_type))
2815 return NULL;
2816 then_clause = gimple_assign_rhs1 (def_stmt0);
2817 itype = orig_type0;
2820 if (orig_type1)
2822 if (!types_compatible_p (orig_type1, comp_scalar_type))
2823 return NULL;
2824 else_clause = gimple_assign_rhs1 (def_stmt1);
2825 itype = orig_type1;
2829 HOST_WIDE_INT cmp_mode_size
2830 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2832 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
2833 return NULL;
2835 vectype = get_vectype_for_scalar_type (type);
2836 if (vectype == NULL_TREE)
2837 return NULL;
2839 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2840 return NULL;
2842 if (itype == NULL_TREE)
2843 itype = build_nonstandard_integer_type (cmp_mode_size,
2844 TYPE_UNSIGNED (type));
2846 if (itype == NULL_TREE
2847 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
2848 return NULL;
2850 vecitype = get_vectype_for_scalar_type (itype);
2851 if (vecitype == NULL_TREE)
2852 return NULL;
2854 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2855 return NULL;
2857 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
2859 if ((TREE_CODE (then_clause) == INTEGER_CST
2860 && !int_fits_type_p (then_clause, itype))
2861 || (TREE_CODE (else_clause) == INTEGER_CST
2862 && !int_fits_type_p (else_clause, itype)))
2863 return NULL;
2866 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2867 COND_EXPR, unshare_expr (cond_expr),
2868 fold_convert (itype, then_clause),
2869 fold_convert (itype, else_clause));
2870 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2871 NOP_EXPR, gimple_assign_lhs (def_stmt));
2873 new_pattern_def_seq (stmt_vinfo, def_stmt);
2874 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
2875 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2876 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2877 *type_in = vecitype;
2878 *type_out = vectype;
2880 if (dump_enabled_p ())
2881 dump_printf_loc (MSG_NOTE, vect_location,
2882 "vect_recog_mixed_size_cond_pattern: detected:\n");
2884 return pattern_stmt;
2888 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2889 true if bool VAR can and should be optimized that way. Assume it shouldn't
2890 in case it's a result of a comparison which can be directly vectorized into
2891 a vector comparison. */
2893 static bool
2894 check_bool_pattern (tree var, vec_info *vinfo)
2896 gimple *def_stmt;
2897 enum vect_def_type dt;
2898 tree rhs1;
2899 enum tree_code rhs_code;
2901 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
2902 return false;
2904 if (dt != vect_internal_def)
2905 return false;
2907 if (!is_gimple_assign (def_stmt))
2908 return false;
2910 if (!has_single_use (var))
2911 return false;
2913 rhs1 = gimple_assign_rhs1 (def_stmt);
2914 rhs_code = gimple_assign_rhs_code (def_stmt);
2915 switch (rhs_code)
2917 case SSA_NAME:
2918 return check_bool_pattern (rhs1, vinfo);
2920 CASE_CONVERT:
2921 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2922 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2923 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2924 return false;
2925 return check_bool_pattern (rhs1, vinfo);
2927 case BIT_NOT_EXPR:
2928 return check_bool_pattern (rhs1, vinfo);
2930 case BIT_AND_EXPR:
2931 case BIT_IOR_EXPR:
2932 case BIT_XOR_EXPR:
2933 if (!check_bool_pattern (rhs1, vinfo))
2934 return false;
2935 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
2937 default:
2938 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2940 tree vecitype, comp_vectype, mask_type;
2942 /* If the comparison can throw, then is_gimple_condexpr will be
2943 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2944 if (stmt_could_throw_p (def_stmt))
2945 return false;
2947 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2948 if (comp_vectype == NULL_TREE)
2949 return false;
2951 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
2952 if (mask_type
2953 && expand_vec_cmp_expr_p (comp_vectype, mask_type))
2954 return false;
2956 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2958 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2959 tree itype
2960 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2961 vecitype = get_vectype_for_scalar_type (itype);
2962 if (vecitype == NULL_TREE)
2963 return false;
2965 else
2966 vecitype = comp_vectype;
2967 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2969 return false;
2974 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2975 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2976 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2978 static tree
2979 adjust_bool_pattern_cast (tree type, tree var)
2981 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2982 gimple *cast_stmt, *pattern_stmt;
2984 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2985 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2986 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2987 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2988 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2989 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2990 return gimple_assign_lhs (cast_stmt);
2994 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2995 recursively. VAR is an SSA_NAME that should be transformed from bool
2996 to a wider integer type, OUT_TYPE is the desired final integer type of
2997 the whole pattern, TRUEVAL should be NULL unless optimizing
2998 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2999 in the then_clause, STMTS is where statements with added pattern stmts
3000 should be pushed to. */
3002 static tree
3003 adjust_bool_pattern (tree var, tree out_type, tree trueval,
3004 vec<gimple *> *stmts)
3006 gimple *stmt = SSA_NAME_DEF_STMT (var);
3007 enum tree_code rhs_code, def_rhs_code;
3008 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3009 location_t loc;
3010 gimple *pattern_stmt, *def_stmt;
3012 rhs1 = gimple_assign_rhs1 (stmt);
3013 rhs2 = gimple_assign_rhs2 (stmt);
3014 rhs_code = gimple_assign_rhs_code (stmt);
3015 loc = gimple_location (stmt);
3016 switch (rhs_code)
3018 case SSA_NAME:
3019 CASE_CONVERT:
3020 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3021 itype = TREE_TYPE (irhs1);
3022 pattern_stmt
3023 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3024 SSA_NAME, irhs1);
3025 break;
3027 case BIT_NOT_EXPR:
3028 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3029 itype = TREE_TYPE (irhs1);
3030 pattern_stmt
3031 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3032 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3033 break;
3035 case BIT_AND_EXPR:
3036 /* Try to optimize x = y & (a < b ? 1 : 0); into
3037 x = (a < b ? y : 0);
3039 E.g. for:
3040 bool a_b, b_b, c_b;
3041 TYPE d_T;
3043 S1 a_b = x1 CMP1 y1;
3044 S2 b_b = x2 CMP2 y2;
3045 S3 c_b = a_b & b_b;
3046 S4 d_T = (TYPE) c_b;
3048 we would normally emit:
3050 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3051 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3052 S3' c_T = a_T & b_T;
3053 S4' d_T = c_T;
3055 but we can save one stmt by using the
3056 result of one of the COND_EXPRs in the other COND_EXPR and leave
3057 BIT_AND_EXPR stmt out:
3059 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3060 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3061 S4' f_T = c_T;
3063 At least when VEC_COND_EXPR is implemented using masks
3064 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3065 computes the comparison masks and ands it, in one case with
3066 all ones vector, in the other case with a vector register.
3067 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3068 often more expensive. */
3069 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3070 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3071 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3073 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3074 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3075 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3076 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3078 gimple *tstmt;
3079 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3080 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3081 tstmt = stmts->pop ();
3082 gcc_assert (tstmt == def_stmt);
3083 stmts->quick_push (stmt);
3084 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3085 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3086 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3087 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3088 return irhs2;
3090 else
3091 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3092 goto and_ior_xor;
3094 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3095 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3096 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3098 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3099 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3100 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3101 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3103 gimple *tstmt;
3104 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3105 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3106 tstmt = stmts->pop ();
3107 gcc_assert (tstmt == def_stmt);
3108 stmts->quick_push (stmt);
3109 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3110 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3111 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3112 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3113 return irhs1;
3115 else
3116 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3117 goto and_ior_xor;
3119 /* FALLTHRU */
3120 case BIT_IOR_EXPR:
3121 case BIT_XOR_EXPR:
3122 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3123 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3124 and_ior_xor:
3125 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3126 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3128 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3129 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3130 int out_prec = TYPE_PRECISION (out_type);
3131 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3132 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3133 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3134 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3135 else
3137 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3138 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3141 itype = TREE_TYPE (irhs1);
3142 pattern_stmt
3143 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3144 rhs_code, irhs1, irhs2);
3145 break;
3147 default:
3148 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3149 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3150 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3151 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3152 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3154 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3155 itype
3156 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3158 else
3159 itype = TREE_TYPE (rhs1);
3160 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3161 if (trueval == NULL_TREE)
3162 trueval = build_int_cst (itype, 1);
3163 else
3164 gcc_checking_assert (useless_type_conversion_p (itype,
3165 TREE_TYPE (trueval)));
3166 pattern_stmt
3167 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3168 COND_EXPR, cond_expr, trueval,
3169 build_int_cst (itype, 0));
3170 break;
3173 stmts->safe_push (stmt);
3174 gimple_set_location (pattern_stmt, loc);
3175 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3176 return gimple_assign_lhs (pattern_stmt);
3180 /* Helper for search_type_for_mask. */
3182 static tree
3183 search_type_for_mask_1 (tree var, vec_info *vinfo,
3184 hash_map<gimple *, tree> &cache)
3186 gimple *def_stmt;
3187 enum vect_def_type dt;
3188 tree rhs1;
3189 enum tree_code rhs_code;
3190 tree res = NULL_TREE, res2;
3192 if (TREE_CODE (var) != SSA_NAME)
3193 return NULL_TREE;
3195 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3196 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3197 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3198 return NULL_TREE;
3200 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3201 return NULL_TREE;
3203 if (dt != vect_internal_def)
3204 return NULL_TREE;
3206 if (!is_gimple_assign (def_stmt))
3207 return NULL_TREE;
3209 tree *c = cache.get (def_stmt);
3210 if (c)
3211 return *c;
3213 rhs_code = gimple_assign_rhs_code (def_stmt);
3214 rhs1 = gimple_assign_rhs1 (def_stmt);
3216 switch (rhs_code)
3218 case SSA_NAME:
3219 case BIT_NOT_EXPR:
3220 CASE_CONVERT:
3221 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3222 break;
3224 case BIT_AND_EXPR:
3225 case BIT_IOR_EXPR:
3226 case BIT_XOR_EXPR:
3227 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3228 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3229 cache);
3230 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3231 res = res2;
3232 break;
3234 default:
3235 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3237 tree comp_vectype, mask_type;
3239 if (TREE_CODE (TREE_TYPE (rhs1)) == BOOLEAN_TYPE)
3241 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3242 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3243 vinfo, cache);
3244 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3245 res = res2;
3246 break;
3249 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3250 if (comp_vectype == NULL_TREE)
3252 res = NULL_TREE;
3253 break;
3256 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3257 if (!mask_type
3258 || !expand_vec_cmp_expr_p (comp_vectype, mask_type))
3260 res = NULL_TREE;
3261 break;
3264 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3265 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3267 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3268 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3270 else
3271 res = TREE_TYPE (rhs1);
3275 cache.put (def_stmt, res);
3276 return res;
3279 /* Return the proper type for converting bool VAR into
3280 an integer value or NULL_TREE if no such type exists.
3281 The type is chosen so that converted value has the
3282 same number of elements as VAR's vector type. */
3284 static tree
3285 search_type_for_mask (tree var, vec_info *vinfo)
3287 hash_map<gimple *, tree> cache;
3288 return search_type_for_mask_1 (var, vinfo, cache);
3291 /* Function vect_recog_bool_pattern
3293 Try to find pattern like following:
3295 bool a_b, b_b, c_b, d_b, e_b;
3296 TYPE f_T;
3297 loop:
3298 S1 a_b = x1 CMP1 y1;
3299 S2 b_b = x2 CMP2 y2;
3300 S3 c_b = a_b & b_b;
3301 S4 d_b = x3 CMP3 y3;
3302 S5 e_b = c_b | d_b;
3303 S6 f_T = (TYPE) e_b;
3305 where type 'TYPE' is an integral type. Or a similar pattern
3306 ending in
3308 S6 f_Y = e_b ? r_Y : s_Y;
3310 as results from if-conversion of a complex condition.
3312 Input:
3314 * LAST_STMT: A stmt at the end from which the pattern
3315 search begins, i.e. cast of a bool to
3316 an integer type.
3318 Output:
3320 * TYPE_IN: The type of the input arguments to the pattern.
3322 * TYPE_OUT: The type of the output of this pattern.
3324 * Return value: A new stmt that will be used to replace the pattern.
3326 Assuming size of TYPE is the same as size of all comparisons
3327 (otherwise some casts would be added where needed), the above
3328 sequence we create related pattern stmts:
3329 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3330 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3331 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3332 S5' e_T = c_T | d_T;
3333 S6' f_T = e_T;
3335 Instead of the above S3' we could emit:
3336 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3337 S3' c_T = a_T | b_T;
3338 but the above is more efficient. */
3340 static gimple *
3341 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3342 tree *type_out)
3344 gimple *last_stmt = stmts->pop ();
3345 enum tree_code rhs_code;
3346 tree var, lhs, rhs, vectype;
3347 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3348 stmt_vec_info new_stmt_info;
3349 vec_info *vinfo = stmt_vinfo->vinfo;
3350 gimple *pattern_stmt;
3352 if (!is_gimple_assign (last_stmt))
3353 return NULL;
3355 var = gimple_assign_rhs1 (last_stmt);
3356 lhs = gimple_assign_lhs (last_stmt);
3358 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3359 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3360 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3361 return NULL;
3363 rhs_code = gimple_assign_rhs_code (last_stmt);
3364 if (CONVERT_EXPR_CODE_P (rhs_code))
3366 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3367 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3368 return NULL;
3369 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3370 if (vectype == NULL_TREE)
3371 return NULL;
3373 if (check_bool_pattern (var, vinfo))
3375 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3376 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3377 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3378 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3379 else
3380 pattern_stmt
3381 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3383 else
3385 tree type = search_type_for_mask (var, vinfo);
3386 tree cst0, cst1, tmp;
3388 if (!type)
3389 return NULL;
3391 /* We may directly use cond with narrowed type to avoid
3392 multiple cond exprs with following result packing and
3393 perform single cond with packed mask instead. In case
3394 of widening we better make cond first and then extract
3395 results. */
3396 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3397 type = TREE_TYPE (lhs);
3399 cst0 = build_int_cst (type, 0);
3400 cst1 = build_int_cst (type, 1);
3401 tmp = vect_recog_temp_ssa_var (type, NULL);
3402 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3404 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3406 tree new_vectype = get_vectype_for_scalar_type (type);
3407 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3408 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3409 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3410 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3412 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3413 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3417 *type_out = vectype;
3418 *type_in = vectype;
3419 stmts->safe_push (last_stmt);
3420 if (dump_enabled_p ())
3421 dump_printf_loc (MSG_NOTE, vect_location,
3422 "vect_recog_bool_pattern: detected:\n");
3424 return pattern_stmt;
3426 else if (rhs_code == COND_EXPR
3427 && TREE_CODE (var) == SSA_NAME)
3429 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3430 if (vectype == NULL_TREE)
3431 return NULL;
3433 /* Build a scalar type for the boolean result that when
3434 vectorized matches the vector type of the result in
3435 size and number of elements. */
3436 unsigned prec
3437 = wi::udiv_trunc (TYPE_SIZE (vectype),
3438 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3439 tree type
3440 = build_nonstandard_integer_type (prec,
3441 TYPE_UNSIGNED (TREE_TYPE (var)));
3442 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3443 return NULL;
3445 if (!check_bool_pattern (var, vinfo))
3446 return NULL;
3448 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3450 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3451 pattern_stmt
3452 = gimple_build_assign (lhs, COND_EXPR,
3453 build2 (NE_EXPR, boolean_type_node,
3454 rhs, build_int_cst (type, 0)),
3455 gimple_assign_rhs2 (last_stmt),
3456 gimple_assign_rhs3 (last_stmt));
3457 *type_out = vectype;
3458 *type_in = vectype;
3459 stmts->safe_push (last_stmt);
3460 if (dump_enabled_p ())
3461 dump_printf_loc (MSG_NOTE, vect_location,
3462 "vect_recog_bool_pattern: detected:\n");
3464 return pattern_stmt;
3466 else if (rhs_code == SSA_NAME
3467 && STMT_VINFO_DATA_REF (stmt_vinfo))
3469 stmt_vec_info pattern_stmt_info;
3470 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3471 gcc_assert (vectype != NULL_TREE);
3472 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3473 return NULL;
3475 if (check_bool_pattern (var, vinfo))
3476 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype),
3477 NULL_TREE, stmts);
3478 else
3480 tree type = search_type_for_mask (var, vinfo);
3481 tree cst0, cst1, new_vectype;
3483 if (!type)
3484 return NULL;
3486 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3487 type = TREE_TYPE (vectype);
3489 cst0 = build_int_cst (type, 0);
3490 cst1 = build_int_cst (type, 1);
3491 new_vectype = get_vectype_for_scalar_type (type);
3493 rhs = vect_recog_temp_ssa_var (type, NULL);
3494 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3496 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3497 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3498 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3499 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3502 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3503 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3505 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3506 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3507 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3508 rhs = rhs2;
3510 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3511 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3512 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3513 STMT_VINFO_DATA_REF (pattern_stmt_info)
3514 = STMT_VINFO_DATA_REF (stmt_vinfo);
3515 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3516 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3517 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3518 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3519 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3520 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3521 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3522 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3523 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3524 *type_out = vectype;
3525 *type_in = vectype;
3526 stmts->safe_push (last_stmt);
3527 if (dump_enabled_p ())
3528 dump_printf_loc (MSG_NOTE, vect_location,
3529 "vect_recog_bool_pattern: detected:\n");
3530 return pattern_stmt;
3532 else
3533 return NULL;
3537 /* A helper for vect_recog_mask_conversion_pattern. Build
3538 conversion of MASK to a type suitable for masking VECTYPE.
3539 Built statement gets required vectype and is appended to
3540 a pattern sequence of STMT_VINFO.
3542 Return converted mask. */
3544 static tree
3545 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3546 vec_info *vinfo)
3548 gimple *stmt;
3549 tree masktype, tmp;
3550 stmt_vec_info new_stmt_info;
3552 masktype = build_same_sized_truth_vector_type (vectype);
3553 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3554 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3555 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3556 set_vinfo_for_stmt (stmt, new_stmt_info);
3557 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3558 append_pattern_def_seq (stmt_vinfo, stmt);
3560 return tmp;
3564 /* Function vect_recog_mask_conversion_pattern
3566 Try to find statements which require boolean type
3567 converison. Additional conversion statements are
3568 added to handle such cases. For example:
3570 bool m_1, m_2, m_3;
3571 int i_4, i_5;
3572 double d_6, d_7;
3573 char c_1, c_2, c_3;
3575 S1 m_1 = i_4 > i_5;
3576 S2 m_2 = d_6 < d_7;
3577 S3 m_3 = m_1 & m_2;
3578 S4 c_1 = m_3 ? c_2 : c_3;
3580 Will be transformed into:
3582 S1 m_1 = i_4 > i_5;
3583 S2 m_2 = d_6 < d_7;
3584 S3'' m_2' = (_Bool[bitsize=32])m_2
3585 S3' m_3' = m_1 & m_2';
3586 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3587 S4' c_1' = m_3'' ? c_2 : c_3; */
3589 static gimple *
3590 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3591 tree *type_out)
3593 gimple *last_stmt = stmts->pop ();
3594 enum tree_code rhs_code;
3595 tree lhs, rhs1, rhs2, tmp, rhs1_type, rhs2_type, vectype1, vectype2;
3596 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3597 stmt_vec_info pattern_stmt_info;
3598 vec_info *vinfo = stmt_vinfo->vinfo;
3599 gimple *pattern_stmt;
3601 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3602 if (is_gimple_call (last_stmt)
3603 && gimple_call_internal_p (last_stmt)
3604 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3605 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3607 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3609 if (load)
3611 lhs = gimple_call_lhs (last_stmt);
3612 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3614 else
3616 rhs2 = gimple_call_arg (last_stmt, 3);
3617 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3620 rhs1 = gimple_call_arg (last_stmt, 2);
3621 rhs1_type = search_type_for_mask (rhs1, vinfo);
3622 if (!rhs1_type)
3623 return NULL;
3624 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3626 if (!vectype1 || !vectype2
3627 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3628 return NULL;
3630 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3632 if (load)
3634 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3635 pattern_stmt
3636 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3637 gimple_call_arg (last_stmt, 0),
3638 gimple_call_arg (last_stmt, 1),
3639 tmp);
3640 gimple_call_set_lhs (pattern_stmt, lhs);
3642 else
3643 pattern_stmt
3644 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3645 gimple_call_arg (last_stmt, 0),
3646 gimple_call_arg (last_stmt, 1),
3647 tmp,
3648 gimple_call_arg (last_stmt, 3));
3651 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3652 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3653 STMT_VINFO_DATA_REF (pattern_stmt_info)
3654 = STMT_VINFO_DATA_REF (stmt_vinfo);
3655 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3656 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3657 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3658 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3659 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3660 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3661 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3662 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3663 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3665 *type_out = vectype1;
3666 *type_in = vectype1;
3667 stmts->safe_push (last_stmt);
3668 if (dump_enabled_p ())
3669 dump_printf_loc (MSG_NOTE, vect_location,
3670 "vect_recog_mask_conversion_pattern: detected:\n");
3672 return pattern_stmt;
3675 if (!is_gimple_assign (last_stmt))
3676 return NULL;
3678 lhs = gimple_assign_lhs (last_stmt);
3679 rhs1 = gimple_assign_rhs1 (last_stmt);
3680 rhs_code = gimple_assign_rhs_code (last_stmt);
3682 /* Check for cond expression requiring mask conversion. */
3683 if (rhs_code == COND_EXPR)
3685 /* vect_recog_mixed_size_cond_pattern could apply.
3686 Do nothing then. */
3687 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3688 return NULL;
3690 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3692 if (TREE_CODE (rhs1) == SSA_NAME)
3694 rhs1_type = search_type_for_mask (rhs1, vinfo);
3695 if (!rhs1_type)
3696 return NULL;
3698 else if (COMPARISON_CLASS_P (rhs1))
3699 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3700 else
3701 return NULL;
3703 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3705 if (!vectype1 || !vectype2
3706 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3707 return NULL;
3709 /* If rhs1 is a comparison we need to move it into a
3710 separate statement. */
3711 if (TREE_CODE (rhs1) != SSA_NAME)
3713 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3714 pattern_stmt = gimple_build_assign (tmp, rhs1);
3715 rhs1 = tmp;
3717 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3718 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3719 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
3720 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3723 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3725 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3726 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
3727 gimple_assign_rhs2 (last_stmt),
3728 gimple_assign_rhs3 (last_stmt));
3730 *type_out = vectype1;
3731 *type_in = vectype1;
3732 stmts->safe_push (last_stmt);
3733 if (dump_enabled_p ())
3734 dump_printf_loc (MSG_NOTE, vect_location,
3735 "vect_recog_mask_conversion_pattern: detected:\n");
3737 return pattern_stmt;
3740 /* Now check for binary boolean operations requiring conversion for
3741 one of operands. */
3742 if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
3743 return NULL;
3745 if (rhs_code != BIT_IOR_EXPR
3746 && rhs_code != BIT_XOR_EXPR
3747 && rhs_code != BIT_AND_EXPR)
3748 return NULL;
3750 rhs2 = gimple_assign_rhs2 (last_stmt);
3752 rhs1_type = search_type_for_mask (rhs1, vinfo);
3753 rhs2_type = search_type_for_mask (rhs2, vinfo);
3755 if (!rhs1_type || !rhs2_type
3756 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
3757 return NULL;
3759 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
3761 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
3762 if (!vectype1)
3763 return NULL;
3764 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
3766 else
3768 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
3769 if (!vectype1)
3770 return NULL;
3771 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3774 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3775 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
3777 *type_out = vectype1;
3778 *type_in = vectype1;
3779 stmts->safe_push (last_stmt);
3780 if (dump_enabled_p ())
3781 dump_printf_loc (MSG_NOTE, vect_location,
3782 "vect_recog_mask_conversion_pattern: detected:\n");
3784 return pattern_stmt;
3788 /* Mark statements that are involved in a pattern. */
3790 static inline void
3791 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
3792 tree pattern_vectype)
3794 stmt_vec_info pattern_stmt_info, def_stmt_info;
3795 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3796 vec_info *vinfo = orig_stmt_info->vinfo;
3797 gimple *def_stmt;
3799 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3800 if (pattern_stmt_info == NULL)
3802 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3803 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3805 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3807 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3808 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3809 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3810 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3811 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3812 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3813 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3814 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3815 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3817 gimple_stmt_iterator si;
3818 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3819 !gsi_end_p (si); gsi_next (&si))
3821 def_stmt = gsi_stmt (si);
3822 def_stmt_info = vinfo_for_stmt (def_stmt);
3823 if (def_stmt_info == NULL)
3825 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3826 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3828 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3829 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3830 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3831 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3832 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3837 /* Function vect_pattern_recog_1
3839 Input:
3840 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3841 computation pattern.
3842 STMT: A stmt from which the pattern search should start.
3844 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3845 expression that computes the same functionality and can be used to
3846 replace the sequence of stmts that are involved in the pattern.
3848 Output:
3849 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3850 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3851 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3852 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3853 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3854 to the available target pattern.
3856 This function also does some bookkeeping, as explained in the documentation
3857 for vect_recog_pattern. */
3859 static bool
3860 vect_pattern_recog_1 (vect_recog_func *recog_func,
3861 gimple_stmt_iterator si,
3862 vec<gimple *> *stmts_to_replace)
3864 gimple *stmt = gsi_stmt (si), *pattern_stmt;
3865 stmt_vec_info stmt_info;
3866 loop_vec_info loop_vinfo;
3867 tree pattern_vectype;
3868 tree type_in, type_out;
3869 enum tree_code code;
3870 int i;
3871 gimple *next;
3873 stmts_to_replace->truncate (0);
3874 stmts_to_replace->quick_push (stmt);
3875 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
3876 if (!pattern_stmt)
3877 return false;
3879 stmt = stmts_to_replace->last ();
3880 stmt_info = vinfo_for_stmt (stmt);
3881 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3883 if (VECTOR_BOOLEAN_TYPE_P (type_in)
3884 || VECTOR_MODE_P (TYPE_MODE (type_in)))
3886 /* No need to check target support (already checked by the pattern
3887 recognition function). */
3888 pattern_vectype = type_out ? type_out : type_in;
3890 else
3892 machine_mode vec_mode;
3893 enum insn_code icode;
3894 optab optab;
3896 /* Check target support */
3897 type_in = get_vectype_for_scalar_type (type_in);
3898 if (!type_in)
3899 return false;
3900 if (type_out)
3901 type_out = get_vectype_for_scalar_type (type_out);
3902 else
3903 type_out = type_in;
3904 if (!type_out)
3905 return false;
3906 pattern_vectype = type_out;
3908 if (is_gimple_assign (pattern_stmt))
3909 code = gimple_assign_rhs_code (pattern_stmt);
3910 else
3912 gcc_assert (is_gimple_call (pattern_stmt));
3913 code = CALL_EXPR;
3916 optab = optab_for_tree_code (code, type_in, optab_default);
3917 vec_mode = TYPE_MODE (type_in);
3918 if (!optab
3919 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3920 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3921 return false;
3924 /* Found a vectorizable pattern. */
3925 if (dump_enabled_p ())
3927 dump_printf_loc (MSG_NOTE, vect_location,
3928 "%s pattern recognized: ", recog_func->name);
3929 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3932 /* Mark the stmts that are involved in the pattern. */
3933 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3935 /* Patterns cannot be vectorized using SLP, because they change the order of
3936 computation. */
3937 if (loop_vinfo)
3938 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3939 if (next == stmt)
3940 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3942 /* It is possible that additional pattern stmts are created and inserted in
3943 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3944 relevant statements. */
3945 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3946 && (unsigned) i < (stmts_to_replace->length () - 1);
3947 i++)
3949 stmt_info = vinfo_for_stmt (stmt);
3950 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3951 if (dump_enabled_p ())
3953 dump_printf_loc (MSG_NOTE, vect_location,
3954 "additional pattern stmt: ");
3955 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3958 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3961 return true;
3965 /* Function vect_pattern_recog
3967 Input:
3968 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3969 computation idioms.
3971 Output - for each computation idiom that is detected we create a new stmt
3972 that provides the same functionality and that can be vectorized. We
3973 also record some information in the struct_stmt_info of the relevant
3974 stmts, as explained below:
3976 At the entry to this function we have the following stmts, with the
3977 following initial value in the STMT_VINFO fields:
3979 stmt in_pattern_p related_stmt vec_stmt
3980 S1: a_i = .... - - -
3981 S2: a_2 = ..use(a_i).. - - -
3982 S3: a_1 = ..use(a_2).. - - -
3983 S4: a_0 = ..use(a_1).. - - -
3984 S5: ... = ..use(a_0).. - - -
3986 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3987 represented by a single stmt. We then:
3988 - create a new stmt S6 equivalent to the pattern (the stmt is not
3989 inserted into the code)
3990 - fill in the STMT_VINFO fields as follows:
3992 in_pattern_p related_stmt vec_stmt
3993 S1: a_i = .... - - -
3994 S2: a_2 = ..use(a_i).. - - -
3995 S3: a_1 = ..use(a_2).. - - -
3996 S4: a_0 = ..use(a_1).. true S6 -
3997 '---> S6: a_new = .... - S4 -
3998 S5: ... = ..use(a_0).. - - -
4000 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4001 to each other through the RELATED_STMT field).
4003 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4004 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4005 remain irrelevant unless used by stmts other than S4.
4007 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4008 (because they are marked as irrelevant). It will vectorize S6, and record
4009 a pointer to the new vector stmt VS6 from S6 (as usual).
4010 S4 will be skipped, and S5 will be vectorized as usual:
4012 in_pattern_p related_stmt vec_stmt
4013 S1: a_i = .... - - -
4014 S2: a_2 = ..use(a_i).. - - -
4015 S3: a_1 = ..use(a_2).. - - -
4016 > VS6: va_new = .... - - -
4017 S4: a_0 = ..use(a_1).. true S6 VS6
4018 '---> S6: a_new = .... - S4 VS6
4019 > VS5: ... = ..vuse(va_new).. - - -
4020 S5: ... = ..use(a_0).. - - -
4022 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4023 elsewhere), and we'll end up with:
4025 VS6: va_new = ....
4026 VS5: ... = ..vuse(va_new)..
4028 In case of more than one pattern statements, e.g., widen-mult with
4029 intermediate type:
4031 S1 a_t = ;
4032 S2 a_T = (TYPE) a_t;
4033 '--> S3: a_it = (interm_type) a_t;
4034 S4 prod_T = a_T * CONST;
4035 '--> S5: prod_T' = a_it w* CONST;
4037 there may be other users of a_T outside the pattern. In that case S2 will
4038 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4039 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4040 be recorded in S3. */
4042 void
4043 vect_pattern_recog (vec_info *vinfo)
4045 struct loop *loop;
4046 basic_block *bbs;
4047 unsigned int nbbs;
4048 gimple_stmt_iterator si;
4049 unsigned int i, j;
4050 auto_vec<gimple *, 1> stmts_to_replace;
4051 gimple *stmt;
4053 if (dump_enabled_p ())
4054 dump_printf_loc (MSG_NOTE, vect_location,
4055 "=== vect_pattern_recog ===\n");
4057 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4059 loop = LOOP_VINFO_LOOP (loop_vinfo);
4060 bbs = LOOP_VINFO_BBS (loop_vinfo);
4061 nbbs = loop->num_nodes;
4063 /* Scan through the loop stmts, applying the pattern recognition
4064 functions starting at each stmt visited: */
4065 for (i = 0; i < nbbs; i++)
4067 basic_block bb = bbs[i];
4068 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4070 /* Scan over all generic vect_recog_xxx_pattern functions. */
4071 for (j = 0; j < NUM_PATTERNS; j++)
4072 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4073 &stmts_to_replace))
4074 break;
4078 else
4080 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4081 for (si = bb_vinfo->region_begin;
4082 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4084 if ((stmt = gsi_stmt (si))
4085 && vinfo_for_stmt (stmt)
4086 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4087 continue;
4089 /* Scan over all generic vect_recog_xxx_pattern functions. */
4090 for (j = 0; j < NUM_PATTERNS; j++)
4091 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4092 &stmts_to_replace))
4093 break;