PR c++/86342 - -Wdeprecated-copy and system headers.
[official-gcc.git] / gcc / tree-vect-patterns.c
blob9654bd7818a08f2f9f3ec3e430f102178f921359
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Report that we've found an instance of pattern PATTERN in
51 statement STMT. */
53 static void
54 vect_pattern_detected (const char *name, gimple *stmt)
56 if (dump_enabled_p ())
58 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: ", name);
59 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
63 static inline void
64 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
66 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
67 stmt);
70 static inline void
71 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
73 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
74 append_pattern_def_seq (stmt_info, stmt);
77 /* Return true if the target supports a vector version of CODE,
78 where CODE is known to map to a direct optab. ITYPE specifies
79 the type of (some of) the scalar inputs and OTYPE specifies the
80 type of the scalar result.
82 If CODE allows the inputs and outputs to have different type
83 (such as for WIDEN_SUM_EXPR), it is the input mode rather
84 than the output mode that determines the appropriate target pattern.
85 Operand 0 of the target pattern then specifies the mode that the output
86 must have.
88 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
89 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
90 is nonnull. */
92 static bool
93 vect_supportable_direct_optab_p (tree otype, tree_code code,
94 tree itype, tree *vecotype_out,
95 tree *vecitype_out = NULL)
97 tree vecitype = get_vectype_for_scalar_type (itype);
98 if (!vecitype)
99 return false;
101 tree vecotype = get_vectype_for_scalar_type (otype);
102 if (!vecotype)
103 return false;
105 optab optab = optab_for_tree_code (code, vecitype, optab_default);
106 if (!optab)
107 return false;
109 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
110 if (icode == CODE_FOR_nothing
111 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
112 return false;
114 *vecotype_out = vecotype;
115 if (vecitype_out)
116 *vecitype_out = vecitype;
117 return true;
120 /* Check whether STMT2 is in the same loop or basic block as STMT1.
121 Which of the two applies depends on whether we're currently doing
122 loop-based or basic-block-based vectorization, as determined by
123 the vinfo_for_stmt for STMT1 (which must be defined).
125 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
126 to be defined as well. */
128 static bool
129 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
131 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
132 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
135 /* If the LHS of DEF_STMT has a single use, and that statement is
136 in the same loop or basic block, return it. */
138 static gimple *
139 vect_single_imm_use (gimple *def_stmt)
141 tree lhs = gimple_assign_lhs (def_stmt);
142 use_operand_p use_p;
143 gimple *use_stmt;
145 if (!single_imm_use (lhs, &use_p, &use_stmt))
146 return NULL;
148 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
149 return NULL;
151 return use_stmt;
154 /* If OP is defined by a statement that's being considered for vectorization,
155 return information about that statement, otherwise return NULL. */
157 static stmt_vec_info
158 vect_get_internal_def (vec_info *vinfo, tree op)
160 vect_def_type dt;
161 gimple *def_stmt;
162 if (TREE_CODE (op) != SSA_NAME
163 || !vect_is_simple_use (op, vinfo, &def_stmt, &dt)
164 || dt != vect_internal_def)
165 return NULL;
167 return vinfo_for_stmt (def_stmt);
170 /* Check whether NAME, an ssa-name used in USE_STMT,
171 is a result of a type promotion, such that:
172 DEF_STMT: NAME = NOP (name0)
173 If CHECK_SIGN is TRUE, check that either both types are signed or both are
174 unsigned. */
176 static bool
177 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
178 tree *orig_type, gimple **def_stmt, bool *promotion)
180 gimple *dummy_gimple;
181 stmt_vec_info stmt_vinfo;
182 tree type = TREE_TYPE (name);
183 tree oprnd0;
184 enum vect_def_type dt;
186 stmt_vinfo = vinfo_for_stmt (use_stmt);
187 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
188 return false;
190 if (dt != vect_internal_def
191 && dt != vect_external_def && dt != vect_constant_def)
192 return false;
194 if (!*def_stmt)
195 return false;
197 if (dt == vect_internal_def)
199 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
200 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
201 return false;
204 if (!is_gimple_assign (*def_stmt))
205 return false;
207 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
208 return false;
210 oprnd0 = gimple_assign_rhs1 (*def_stmt);
212 *orig_type = TREE_TYPE (oprnd0);
213 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
214 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
215 return false;
217 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
218 *promotion = true;
219 else
220 *promotion = false;
222 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
223 return false;
225 return true;
228 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
229 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
231 static tree
232 vect_recog_temp_ssa_var (tree type, gimple *stmt)
234 return make_temp_ssa_name (type, stmt, "patt");
237 /* Return true if STMT_VINFO describes a reduction for which reassociation
238 is allowed. If STMT_INFO is part of a group, assume that it's part of
239 a reduction chain and optimistically assume that all statements
240 except the last allow reassociation. */
242 static bool
243 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
245 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
246 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
247 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
250 /* Function vect_recog_dot_prod_pattern
252 Try to find the following pattern:
254 type x_t, y_t;
255 TYPE1 prod;
256 TYPE2 sum = init;
257 loop:
258 sum_0 = phi <init, sum_1>
259 S1 x_t = ...
260 S2 y_t = ...
261 S3 x_T = (TYPE1) x_t;
262 S4 y_T = (TYPE1) y_t;
263 S5 prod = x_T * y_T;
264 [S6 prod = (TYPE2) prod; #optional]
265 S7 sum_1 = prod + sum_0;
267 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
268 same size of 'TYPE1' or bigger. This is a special case of a reduction
269 computation.
271 Input:
273 * STMTS: Contains a stmt from which the pattern search begins. In the
274 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
275 will be detected.
277 Output:
279 * TYPE_OUT: The type of the output of this pattern.
281 * Return value: A new stmt that will be used to replace the sequence of
282 stmts that constitute the pattern. In this case it will be:
283 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
285 Note: The dot-prod idiom is a widening reduction pattern that is
286 vectorized without preserving all the intermediate results. It
287 produces only N/2 (widened) results (by summing up pairs of
288 intermediate results) rather than all N results. Therefore, we
289 cannot allow this pattern when we want to get all the results and in
290 the correct order (as is the case when this computation is in an
291 inner-loop nested in an outer-loop that us being vectorized). */
293 static gimple *
294 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_out)
296 gimple *stmt, *last_stmt = (*stmts)[0];
297 tree oprnd0, oprnd1;
298 tree oprnd00, oprnd01;
299 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
300 vec_info *vinfo = stmt_vinfo->vinfo;
301 tree type, half_type;
302 gimple *pattern_stmt;
303 tree prod_type;
304 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
305 struct loop *loop;
306 tree var;
307 bool promotion;
309 if (!loop_info)
310 return NULL;
312 loop = LOOP_VINFO_LOOP (loop_info);
314 /* We don't allow changing the order of the computation in the inner-loop
315 when doing outer-loop vectorization. */
316 if (loop && nested_in_vect_loop_p (loop, last_stmt))
317 return NULL;
319 if (!is_gimple_assign (last_stmt))
320 return NULL;
322 type = gimple_expr_type (last_stmt);
324 /* Look for the following pattern
325 DX = (TYPE1) X;
326 DY = (TYPE1) Y;
327 DPROD = DX * DY;
328 DDPROD = (TYPE2) DPROD;
329 sum_1 = DDPROD + sum_0;
330 In which
331 - DX is double the size of X
332 - DY is double the size of Y
333 - DX, DY, DPROD all have the same type
334 - sum is the same size of DPROD or bigger
335 - sum has been recognized as a reduction variable.
337 This is equivalent to:
338 DPROD = X w* Y; #widen mult
339 sum_1 = DPROD w+ sum_0; #widen summation
341 DPROD = X w* Y; #widen mult
342 sum_1 = DPROD + sum_0; #summation
345 /* Starting from LAST_STMT, follow the defs of its uses in search
346 of the above pattern. */
348 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
349 return NULL;
351 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
352 return NULL;
354 if (!vect_reassociating_reduction_p (stmt_vinfo))
355 return NULL;
357 oprnd0 = gimple_assign_rhs1 (last_stmt);
358 oprnd1 = gimple_assign_rhs2 (last_stmt);
359 stmt = last_stmt;
361 gimple *def_stmt;
362 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
363 &promotion)
364 && promotion)
366 stmt = def_stmt;
367 oprnd0 = gimple_assign_rhs1 (stmt);
369 else
370 half_type = type;
372 /* So far so good. Since last_stmt was detected as a (summation) reduction,
373 we know that oprnd1 is the reduction variable (defined by a loop-header
374 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
375 Left to check that oprnd0 is defined by a (widen_)mult_expr */
376 if (TREE_CODE (oprnd0) != SSA_NAME)
377 return NULL;
379 prod_type = half_type;
380 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
381 if (!mult_vinfo)
382 return NULL;
384 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
385 inside the loop (in case we are analyzing an outer-loop). */
386 gassign *mult = dyn_cast <gassign *> (mult_vinfo->stmt);
387 if (!mult || gimple_assign_rhs_code (mult) != MULT_EXPR)
388 return NULL;
389 if (STMT_VINFO_IN_PATTERN_P (mult_vinfo))
391 /* Has been detected as a widening multiplication? */
393 mult = dyn_cast <gassign *> (STMT_VINFO_RELATED_STMT (mult_vinfo));
394 if (!mult || gimple_assign_rhs_code (mult) != WIDEN_MULT_EXPR)
395 return NULL;
396 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)
397 = STMT_VINFO_PATTERN_DEF_SEQ (mult_vinfo);
398 mult_vinfo = vinfo_for_stmt (mult);
399 gcc_assert (mult_vinfo);
400 gcc_assert (STMT_VINFO_DEF_TYPE (mult_vinfo) == vect_internal_def);
401 oprnd00 = gimple_assign_rhs1 (mult);
402 oprnd01 = gimple_assign_rhs2 (mult);
404 else
406 tree half_type0, half_type1;
407 gimple *def_stmt;
408 tree oprnd0, oprnd1;
410 oprnd0 = gimple_assign_rhs1 (mult);
411 oprnd1 = gimple_assign_rhs2 (mult);
412 if (!type_conversion_p (oprnd0, mult, true, &half_type0, &def_stmt,
413 &promotion)
414 || !promotion)
415 return NULL;
416 oprnd00 = gimple_assign_rhs1 (def_stmt);
417 if (!type_conversion_p (oprnd1, mult, true, &half_type1, &def_stmt,
418 &promotion)
419 || !promotion)
420 return NULL;
421 oprnd01 = gimple_assign_rhs1 (def_stmt);
422 if (!types_compatible_p (half_type0, half_type1))
423 return NULL;
424 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
425 return NULL;
428 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
430 half_type = TREE_TYPE (oprnd00);
431 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
432 type_out))
433 return NULL;
435 var = vect_recog_temp_ssa_var (type, NULL);
436 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
437 oprnd00, oprnd01, oprnd1);
439 return pattern_stmt;
443 /* Function vect_recog_sad_pattern
445 Try to find the following Sum of Absolute Difference (SAD) pattern:
447 type x_t, y_t;
448 signed TYPE1 diff, abs_diff;
449 TYPE2 sum = init;
450 loop:
451 sum_0 = phi <init, sum_1>
452 S1 x_t = ...
453 S2 y_t = ...
454 S3 x_T = (TYPE1) x_t;
455 S4 y_T = (TYPE1) y_t;
456 S5 diff = x_T - y_T;
457 S6 abs_diff = ABS_EXPR <diff>;
458 [S7 abs_diff = (TYPE2) abs_diff; #optional]
459 S8 sum_1 = abs_diff + sum_0;
461 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
462 same size of 'TYPE1' or bigger. This is a special case of a reduction
463 computation.
465 Input:
467 * STMTS: Contains a stmt from which the pattern search begins. In the
468 example, when this function is called with S8, the pattern
469 {S3,S4,S5,S6,S7,S8} will be detected.
471 Output:
473 * TYPE_OUT: The type of the output of this pattern.
475 * Return value: A new stmt that will be used to replace the sequence of
476 stmts that constitute the pattern. In this case it will be:
477 SAD_EXPR <x_t, y_t, sum_0>
480 static gimple *
481 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_out)
483 gimple *last_stmt = (*stmts)[0];
484 tree sad_oprnd0, sad_oprnd1;
485 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
486 vec_info *vinfo = stmt_vinfo->vinfo;
487 tree half_type;
488 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
489 struct loop *loop;
490 bool promotion;
492 if (!loop_info)
493 return NULL;
495 loop = LOOP_VINFO_LOOP (loop_info);
497 /* We don't allow changing the order of the computation in the inner-loop
498 when doing outer-loop vectorization. */
499 if (loop && nested_in_vect_loop_p (loop, last_stmt))
500 return NULL;
502 if (!is_gimple_assign (last_stmt))
503 return NULL;
505 tree sum_type = gimple_expr_type (last_stmt);
507 /* Look for the following pattern
508 DX = (TYPE1) X;
509 DY = (TYPE1) Y;
510 DDIFF = DX - DY;
511 DAD = ABS_EXPR <DDIFF>;
512 DDPROD = (TYPE2) DPROD;
513 sum_1 = DAD + sum_0;
514 In which
515 - DX is at least double the size of X
516 - DY is at least double the size of Y
517 - DX, DY, DDIFF, DAD all have the same type
518 - sum is the same size of DAD or bigger
519 - sum has been recognized as a reduction variable.
521 This is equivalent to:
522 DDIFF = X w- Y; #widen sub
523 DAD = ABS_EXPR <DDIFF>;
524 sum_1 = DAD w+ sum_0; #widen summation
526 DDIFF = X w- Y; #widen sub
527 DAD = ABS_EXPR <DDIFF>;
528 sum_1 = DAD + sum_0; #summation
531 /* Starting from LAST_STMT, follow the defs of its uses in search
532 of the above pattern. */
534 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
535 return NULL;
537 tree plus_oprnd0, plus_oprnd1;
539 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
540 return NULL;
542 if (!vect_reassociating_reduction_p (stmt_vinfo))
543 return NULL;
545 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
546 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
548 /* The type conversion could be promotion, demotion,
549 or just signed -> unsigned. */
550 gimple *def_stmt;
551 if (type_conversion_p (plus_oprnd0, last_stmt, false,
552 &half_type, &def_stmt, &promotion))
553 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
554 else
555 half_type = sum_type;
557 /* So far so good. Since last_stmt was detected as a (summation) reduction,
558 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
559 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
560 Then check that plus_oprnd0 is defined by an abs_expr. */
562 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
563 return NULL;
565 tree abs_type = half_type;
566 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
567 if (!abs_stmt_vinfo)
568 return NULL;
570 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
571 inside the loop (in case we are analyzing an outer-loop). */
572 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
573 if (!abs_stmt
574 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
575 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
576 return NULL;
578 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
579 if (TYPE_UNSIGNED (abs_type))
580 return NULL;
582 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
584 if (TREE_CODE (abs_oprnd) != SSA_NAME)
585 return NULL;
587 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
588 if (!diff_stmt_vinfo)
589 return NULL;
591 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
592 inside the loop (in case we are analyzing an outer-loop). */
593 gassign *diff_stmt = dyn_cast <gassign *> (diff_stmt_vinfo->stmt);
594 if (!diff_stmt || gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
595 return NULL;
597 tree half_type0, half_type1;
599 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
600 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
602 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
603 &half_type0, &def_stmt, &promotion)
604 || !promotion)
605 return NULL;
606 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
608 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
609 &half_type1, &def_stmt, &promotion)
610 || !promotion)
611 return NULL;
612 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
614 if (!types_compatible_p (half_type0, half_type1))
615 return NULL;
616 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
617 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
618 return NULL;
620 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
622 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR,
623 TREE_TYPE (sad_oprnd0), type_out))
624 return NULL;
626 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
627 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
628 sad_oprnd1, plus_oprnd1);
630 return pattern_stmt;
634 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
635 and LSHIFT_EXPR.
637 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
638 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
640 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
641 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
642 that satisfies the above restrictions, we can perform a widening opeartion
643 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
644 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
646 static bool
647 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
648 tree const_oprnd, tree *oprnd,
649 gimple **wstmt, tree type,
650 tree *half_type, gimple *def_stmt)
652 tree new_type, new_oprnd;
654 if (code != MULT_EXPR && code != LSHIFT_EXPR)
655 return false;
657 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
658 || (code == LSHIFT_EXPR
659 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
660 != 1))
661 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
663 /* CONST_OPRND is a constant of HALF_TYPE. */
664 *oprnd = gimple_assign_rhs1 (def_stmt);
665 return true;
668 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
669 return false;
671 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
672 return false;
674 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
675 a type 2 times bigger than HALF_TYPE. */
676 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
677 TYPE_UNSIGNED (type));
678 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
679 || (code == LSHIFT_EXPR
680 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
681 return false;
683 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
684 *oprnd = gimple_assign_rhs1 (def_stmt);
685 new_oprnd = make_ssa_name (new_type);
686 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
687 *oprnd = new_oprnd;
689 *half_type = new_type;
690 return true;
694 /* Function vect_recog_widen_mult_pattern
696 Try to find the following pattern:
698 type1 a_t;
699 type2 b_t;
700 TYPE a_T, b_T, prod_T;
702 S1 a_t = ;
703 S2 b_t = ;
704 S3 a_T = (TYPE) a_t;
705 S4 b_T = (TYPE) b_t;
706 S5 prod_T = a_T * b_T;
708 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
710 Also detect unsigned cases:
712 unsigned type1 a_t;
713 unsigned type2 b_t;
714 unsigned TYPE u_prod_T;
715 TYPE a_T, b_T, prod_T;
717 S1 a_t = ;
718 S2 b_t = ;
719 S3 a_T = (TYPE) a_t;
720 S4 b_T = (TYPE) b_t;
721 S5 prod_T = a_T * b_T;
722 S6 u_prod_T = (unsigned TYPE) prod_T;
724 and multiplication by constants:
726 type a_t;
727 TYPE a_T, prod_T;
729 S1 a_t = ;
730 S3 a_T = (TYPE) a_t;
731 S5 prod_T = a_T * CONST;
733 A special case of multiplication by constants is when 'TYPE' is 4 times
734 bigger than 'type', but CONST fits an intermediate type 2 times smaller
735 than 'TYPE'. In that case we create an additional pattern stmt for S3
736 to create a variable of the intermediate type, and perform widen-mult
737 on the intermediate type as well:
739 type a_t;
740 interm_type a_it;
741 TYPE a_T, prod_T, prod_T';
743 S1 a_t = ;
744 S3 a_T = (TYPE) a_t;
745 '--> a_it = (interm_type) a_t;
746 S5 prod_T = a_T * CONST;
747 '--> prod_T' = a_it w* CONST;
749 Input/Output:
751 * STMTS: Contains a stmt from which the pattern search begins. In the
752 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
753 is detected. In case of unsigned widen-mult, the original stmt (S5) is
754 replaced with S6 in STMTS. In case of multiplication by a constant
755 of an intermediate type (the last case above), STMTS also contains S3
756 (inserted before S5).
758 Output:
760 * TYPE_OUT: The type of the output of this pattern.
762 * Return value: A new stmt that will be used to replace the sequence of
763 stmts that constitute the pattern. In this case it will be:
764 WIDEN_MULT <a_t, b_t>
765 If the result of WIDEN_MULT needs to be converted to a larger type, the
766 returned stmt will be this type conversion stmt.
769 static gimple *
770 vect_recog_widen_mult_pattern (vec<gimple *> *stmts, tree *type_out)
772 gimple *last_stmt = stmts->pop ();
773 gimple *def_stmt0, *def_stmt1;
774 tree oprnd0, oprnd1;
775 tree type, half_type0, half_type1;
776 gimple *new_stmt = NULL, *pattern_stmt = NULL;
777 tree vectype, vecitype;
778 tree var;
779 enum tree_code dummy_code;
780 int dummy_int;
781 vec<tree> dummy_vec;
782 bool op1_ok;
783 bool promotion;
785 if (!is_gimple_assign (last_stmt))
786 return NULL;
788 type = gimple_expr_type (last_stmt);
790 /* Starting from LAST_STMT, follow the defs of its uses in search
791 of the above pattern. */
793 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
794 return NULL;
796 oprnd0 = gimple_assign_rhs1 (last_stmt);
797 oprnd1 = gimple_assign_rhs2 (last_stmt);
799 /* Check argument 0. */
800 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
801 &promotion)
802 || !promotion)
803 return NULL;
804 /* Check argument 1. */
805 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
806 &def_stmt1, &promotion);
808 if (op1_ok && promotion)
810 oprnd0 = gimple_assign_rhs1 (def_stmt0);
811 oprnd1 = gimple_assign_rhs1 (def_stmt1);
813 else
815 if (TREE_CODE (oprnd1) == INTEGER_CST
816 && TREE_CODE (half_type0) == INTEGER_TYPE
817 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
818 &oprnd0, &new_stmt, type,
819 &half_type0, def_stmt0))
821 half_type1 = half_type0;
822 oprnd1 = fold_convert (half_type1, oprnd1);
824 else
825 return NULL;
828 /* If the two arguments have different sizes, convert the one with
829 the smaller type into the larger type. */
830 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
832 /* If we already used up the single-stmt slot give up. */
833 if (new_stmt)
834 return NULL;
836 tree* oprnd = NULL;
837 gimple *def_stmt = NULL;
839 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
841 def_stmt = def_stmt0;
842 half_type0 = half_type1;
843 oprnd = &oprnd0;
845 else
847 def_stmt = def_stmt1;
848 half_type1 = half_type0;
849 oprnd = &oprnd1;
852 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
853 tree new_oprnd = make_ssa_name (half_type0);
854 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
855 *oprnd = new_oprnd;
858 /* Handle unsigned case. Look for
859 S6 u_prod_T = (unsigned TYPE) prod_T;
860 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
861 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
863 gimple *use_stmt;
864 tree use_lhs;
865 tree use_type;
867 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
868 return NULL;
870 use_stmt = vect_single_imm_use (last_stmt);
871 if (!use_stmt || !is_gimple_assign (use_stmt)
872 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
873 return NULL;
875 use_lhs = gimple_assign_lhs (use_stmt);
876 use_type = TREE_TYPE (use_lhs);
877 if (!INTEGRAL_TYPE_P (use_type)
878 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
879 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
880 return NULL;
882 type = use_type;
883 last_stmt = use_stmt;
886 if (!types_compatible_p (half_type0, half_type1))
887 return NULL;
889 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
890 to get an intermediate result of type ITYPE. In this case we need
891 to build a statement to convert this intermediate result to type TYPE. */
892 tree itype = type;
893 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
894 itype = build_nonstandard_integer_type
895 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
896 TYPE_UNSIGNED (type));
898 /* Pattern detected. */
899 vect_pattern_detected ("vect_recog_widen_mult_pattern", last_stmt);
901 /* Check target support */
902 vectype = get_vectype_for_scalar_type (half_type0);
903 vecitype = get_vectype_for_scalar_type (itype);
904 if (!vectype
905 || !vecitype
906 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
907 vecitype, vectype,
908 &dummy_code, &dummy_code,
909 &dummy_int, &dummy_vec))
910 return NULL;
912 *type_out = get_vectype_for_scalar_type (type);
913 if (!*type_out)
914 return NULL;
916 /* Pattern supported. Create a stmt to be used to replace the pattern: */
917 var = vect_recog_temp_ssa_var (itype, NULL);
918 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
920 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
921 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
923 /* If the original two operands have different sizes, we may need to convert
924 the smaller one into the larget type. If this is the case, at this point
925 the new stmt is already built. */
926 if (new_stmt)
928 append_pattern_def_seq (stmt_vinfo, new_stmt);
929 stmt_vec_info new_stmt_info
930 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
931 set_vinfo_for_stmt (new_stmt, new_stmt_info);
932 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
935 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
936 the result of the widen-mult operation into type TYPE. */
937 if (itype != type)
939 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
940 stmt_vec_info pattern_stmt_info
941 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
942 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
943 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
944 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
945 NOP_EXPR,
946 gimple_assign_lhs (pattern_stmt));
949 stmts->safe_push (last_stmt);
950 return pattern_stmt;
954 /* Function vect_recog_pow_pattern
956 Try to find the following pattern:
958 x = POW (y, N);
960 with POW being one of pow, powf, powi, powif and N being
961 either 2 or 0.5.
963 Input:
965 * LAST_STMT: A stmt from which the pattern search begins.
967 Output:
969 * TYPE_OUT: The type of the output of this pattern.
971 * Return value: A new stmt that will be used to replace the sequence of
972 stmts that constitute the pattern. In this case it will be:
973 x = x * x
975 x = sqrt (x)
978 static gimple *
979 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_out)
981 gimple *last_stmt = (*stmts)[0];
982 tree base, exp;
983 gimple *stmt;
984 tree var;
986 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
987 return NULL;
989 switch (gimple_call_combined_fn (last_stmt))
991 CASE_CFN_POW:
992 CASE_CFN_POWI:
993 break;
995 default:
996 return NULL;
999 base = gimple_call_arg (last_stmt, 0);
1000 exp = gimple_call_arg (last_stmt, 1);
1001 if (TREE_CODE (exp) != REAL_CST
1002 && TREE_CODE (exp) != INTEGER_CST)
1004 if (flag_unsafe_math_optimizations
1005 && TREE_CODE (base) == REAL_CST
1006 && !gimple_call_internal_p (last_stmt))
1008 combined_fn log_cfn;
1009 built_in_function exp_bfn;
1010 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1012 case BUILT_IN_POW:
1013 log_cfn = CFN_BUILT_IN_LOG;
1014 exp_bfn = BUILT_IN_EXP;
1015 break;
1016 case BUILT_IN_POWF:
1017 log_cfn = CFN_BUILT_IN_LOGF;
1018 exp_bfn = BUILT_IN_EXPF;
1019 break;
1020 case BUILT_IN_POWL:
1021 log_cfn = CFN_BUILT_IN_LOGL;
1022 exp_bfn = BUILT_IN_EXPL;
1023 break;
1024 default:
1025 return NULL;
1027 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1028 tree exp_decl = builtin_decl_implicit (exp_bfn);
1029 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1030 does that, but if C is a power of 2, we want to use
1031 exp2 (log2 (C) * x) in the non-vectorized version, but for
1032 vectorization we don't have vectorized exp2. */
1033 if (logc
1034 && TREE_CODE (logc) == REAL_CST
1035 && exp_decl
1036 && lookup_attribute ("omp declare simd",
1037 DECL_ATTRIBUTES (exp_decl)))
1039 cgraph_node *node = cgraph_node::get_create (exp_decl);
1040 if (node->simd_clones == NULL)
1042 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1043 || node->definition)
1044 return NULL;
1045 expand_simd_clones (node);
1046 if (node->simd_clones == NULL)
1047 return NULL;
1049 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1050 if (!*type_out)
1051 return NULL;
1052 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1053 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1054 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1055 new_pattern_def_seq (stmt_vinfo, g);
1056 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1057 g = gimple_build_call (exp_decl, 1, def);
1058 gimple_call_set_lhs (g, res);
1059 return g;
1063 return NULL;
1066 /* We now have a pow or powi builtin function call with a constant
1067 exponent. */
1069 /* Catch squaring. */
1070 if ((tree_fits_shwi_p (exp)
1071 && tree_to_shwi (exp) == 2)
1072 || (TREE_CODE (exp) == REAL_CST
1073 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1075 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1076 TREE_TYPE (base), type_out))
1077 return NULL;
1079 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1080 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1081 return stmt;
1084 /* Catch square root. */
1085 if (TREE_CODE (exp) == REAL_CST
1086 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1088 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1089 if (*type_out
1090 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1091 OPTIMIZE_FOR_SPEED))
1093 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1094 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1095 gimple_call_set_lhs (stmt, var);
1096 gimple_call_set_nothrow (stmt, true);
1097 return stmt;
1101 return NULL;
1105 /* Function vect_recog_widen_sum_pattern
1107 Try to find the following pattern:
1109 type x_t;
1110 TYPE x_T, sum = init;
1111 loop:
1112 sum_0 = phi <init, sum_1>
1113 S1 x_t = *p;
1114 S2 x_T = (TYPE) x_t;
1115 S3 sum_1 = x_T + sum_0;
1117 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1118 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1119 a special case of a reduction computation.
1121 Input:
1123 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1124 when this function is called with S3, the pattern {S2,S3} will be detected.
1126 Output:
1128 * TYPE_OUT: The type of the output of this pattern.
1130 * Return value: A new stmt that will be used to replace the sequence of
1131 stmts that constitute the pattern. In this case it will be:
1132 WIDEN_SUM <x_t, sum_0>
1134 Note: The widening-sum idiom is a widening reduction pattern that is
1135 vectorized without preserving all the intermediate results. It
1136 produces only N/2 (widened) results (by summing up pairs of
1137 intermediate results) rather than all N results. Therefore, we
1138 cannot allow this pattern when we want to get all the results and in
1139 the correct order (as is the case when this computation is in an
1140 inner-loop nested in an outer-loop that us being vectorized). */
1142 static gimple *
1143 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_out)
1145 gimple *stmt, *last_stmt = (*stmts)[0];
1146 tree oprnd0, oprnd1;
1147 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1148 tree type, half_type;
1149 gimple *pattern_stmt;
1150 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1151 struct loop *loop;
1152 tree var;
1153 bool promotion;
1155 if (!loop_info)
1156 return NULL;
1158 loop = LOOP_VINFO_LOOP (loop_info);
1160 /* We don't allow changing the order of the computation in the inner-loop
1161 when doing outer-loop vectorization. */
1162 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1163 return NULL;
1165 if (!is_gimple_assign (last_stmt))
1166 return NULL;
1168 type = gimple_expr_type (last_stmt);
1170 /* Look for the following pattern
1171 DX = (TYPE) X;
1172 sum_1 = DX + sum_0;
1173 In which DX is at least double the size of X, and sum_1 has been
1174 recognized as a reduction variable.
1177 /* Starting from LAST_STMT, follow the defs of its uses in search
1178 of the above pattern. */
1180 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1181 return NULL;
1183 if (!vect_reassociating_reduction_p (stmt_vinfo))
1184 return NULL;
1186 oprnd0 = gimple_assign_rhs1 (last_stmt);
1187 oprnd1 = gimple_assign_rhs2 (last_stmt);
1189 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1190 we know that oprnd1 is the reduction variable (defined by a loop-header
1191 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1192 Left to check that oprnd0 is defined by a cast from type 'type' to type
1193 'TYPE'. */
1195 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1196 &promotion)
1197 || !promotion)
1198 return NULL;
1200 oprnd0 = gimple_assign_rhs1 (stmt);
1202 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1204 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, half_type,
1205 type_out))
1206 return NULL;
1208 var = vect_recog_temp_ssa_var (type, NULL);
1209 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1211 return pattern_stmt;
1215 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1217 Input:
1218 STMT - a statement to check.
1219 DEF - we support operations with two operands, one of which is constant.
1220 The other operand can be defined by a demotion operation, or by a
1221 previous statement in a sequence of over-promoted operations. In the
1222 later case DEF is used to replace that operand. (It is defined by a
1223 pattern statement we created for the previous statement in the
1224 sequence).
1226 Input/output:
1227 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1228 NULL, it's the type of DEF.
1229 STMTS - additional pattern statements. If a pattern statement (type
1230 conversion) is created in this function, its original statement is
1231 added to STMTS.
1233 Output:
1234 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1235 operands to use in the new pattern statement for STMT (will be created
1236 in vect_recog_over_widening_pattern ()).
1237 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1238 statements for STMT: the first one is a type promotion and the second
1239 one is the operation itself. We return the type promotion statement
1240 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1241 the second pattern statement. */
1243 static bool
1244 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1245 tree *op0, tree *op1, gimple **new_def_stmt,
1246 vec<gimple *> *stmts)
1248 enum tree_code code;
1249 tree const_oprnd, oprnd;
1250 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1251 gimple *def_stmt, *new_stmt;
1252 bool first = false;
1253 bool promotion;
1255 *op0 = NULL_TREE;
1256 *op1 = NULL_TREE;
1257 *new_def_stmt = NULL;
1259 if (!is_gimple_assign (stmt))
1260 return false;
1262 code = gimple_assign_rhs_code (stmt);
1263 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1264 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1265 return false;
1267 oprnd = gimple_assign_rhs1 (stmt);
1268 const_oprnd = gimple_assign_rhs2 (stmt);
1269 type = gimple_expr_type (stmt);
1271 if (TREE_CODE (oprnd) != SSA_NAME
1272 || TREE_CODE (const_oprnd) != INTEGER_CST)
1273 return false;
1275 /* If oprnd has other uses besides that in stmt we cannot mark it
1276 as being part of a pattern only. */
1277 if (!has_single_use (oprnd))
1278 return false;
1280 /* If we are in the middle of a sequence, we use DEF from a previous
1281 statement. Otherwise, OPRND has to be a result of type promotion. */
1282 if (*new_type)
1284 half_type = *new_type;
1285 oprnd = def;
1287 else
1289 first = true;
1290 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1291 &promotion)
1292 || !promotion
1293 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1294 return false;
1297 /* Can we perform the operation on a smaller type? */
1298 switch (code)
1300 case BIT_IOR_EXPR:
1301 case BIT_XOR_EXPR:
1302 case BIT_AND_EXPR:
1303 if (!int_fits_type_p (const_oprnd, half_type))
1305 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1306 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1307 return false;
1309 interm_type = build_nonstandard_integer_type (
1310 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1311 if (!int_fits_type_p (const_oprnd, interm_type))
1312 return false;
1315 break;
1317 case LSHIFT_EXPR:
1318 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1319 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1320 return false;
1322 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1323 (e.g., if the original value was char, the shift amount is at most 8
1324 if we want to use short). */
1325 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1326 return false;
1328 interm_type = build_nonstandard_integer_type (
1329 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1331 if (!vect_supportable_shift (code, interm_type))
1332 return false;
1334 break;
1336 case RSHIFT_EXPR:
1337 if (vect_supportable_shift (code, half_type))
1338 break;
1340 /* Try intermediate type - HALF_TYPE is not supported. */
1341 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1342 return false;
1344 interm_type = build_nonstandard_integer_type (
1345 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1347 if (!vect_supportable_shift (code, interm_type))
1348 return false;
1350 break;
1352 default:
1353 gcc_unreachable ();
1356 /* There are four possible cases:
1357 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1358 the first statement in the sequence)
1359 a. The original, HALF_TYPE, is not enough - we replace the promotion
1360 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1361 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1362 promotion.
1363 2. OPRND is defined by a pattern statement we created.
1364 a. Its type is not sufficient for the operation, we create a new stmt:
1365 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1366 this statement in NEW_DEF_STMT, and it is later put in
1367 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1368 b. OPRND is good to use in the new statement. */
1369 if (first)
1371 if (interm_type)
1373 /* Replace the original type conversion HALF_TYPE->TYPE with
1374 HALF_TYPE->INTERM_TYPE. */
1375 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1377 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1378 /* Check if the already created pattern stmt is what we need. */
1379 if (!is_gimple_assign (new_stmt)
1380 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1381 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1382 return false;
1384 stmts->safe_push (def_stmt);
1385 oprnd = gimple_assign_lhs (new_stmt);
1387 else
1389 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1390 oprnd = gimple_assign_rhs1 (def_stmt);
1391 new_oprnd = make_ssa_name (interm_type);
1392 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1393 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1394 stmts->safe_push (def_stmt);
1395 oprnd = new_oprnd;
1398 else
1400 /* Retrieve the operand before the type promotion. */
1401 oprnd = gimple_assign_rhs1 (def_stmt);
1404 else
1406 if (interm_type)
1408 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1409 new_oprnd = make_ssa_name (interm_type);
1410 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1411 oprnd = new_oprnd;
1412 *new_def_stmt = new_stmt;
1415 /* Otherwise, OPRND is already set. */
1418 if (interm_type)
1419 *new_type = interm_type;
1420 else
1421 *new_type = half_type;
1423 *op0 = oprnd;
1424 *op1 = fold_convert (*new_type, const_oprnd);
1426 return true;
1430 /* Try to find a statement or a sequence of statements that can be performed
1431 on a smaller type:
1433 type x_t;
1434 TYPE x_T, res0_T, res1_T;
1435 loop:
1436 S1 x_t = *p;
1437 S2 x_T = (TYPE) x_t;
1438 S3 res0_T = op (x_T, C0);
1439 S4 res1_T = op (res0_T, C1);
1440 S5 ... = () res1_T; - type demotion
1442 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1443 constants.
1444 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1445 be 'type' or some intermediate type. For now, we expect S5 to be a type
1446 demotion operation. We also check that S3 and S4 have only one use. */
1448 static gimple *
1449 vect_recog_over_widening_pattern (vec<gimple *> *stmts, tree *type_out)
1451 gimple *stmt = stmts->pop ();
1452 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1453 *use_stmt = NULL;
1454 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1455 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1456 bool first;
1457 tree type = NULL;
1459 first = true;
1460 while (1)
1462 if (!vinfo_for_stmt (stmt)
1463 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1464 return NULL;
1466 new_def_stmt = NULL;
1467 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1468 &op0, &op1, &new_def_stmt,
1469 stmts))
1471 if (first)
1472 return NULL;
1473 else
1474 break;
1477 /* STMT can be performed on a smaller type. Check its uses. */
1478 use_stmt = vect_single_imm_use (stmt);
1479 if (!use_stmt || !is_gimple_assign (use_stmt))
1480 return NULL;
1482 /* Create pattern statement for STMT. */
1483 vectype = get_vectype_for_scalar_type (new_type);
1484 if (!vectype)
1485 return NULL;
1487 /* We want to collect all the statements for which we create pattern
1488 statetments, except for the case when the last statement in the
1489 sequence doesn't have a corresponding pattern statement. In such
1490 case we associate the last pattern statement with the last statement
1491 in the sequence. Therefore, we only add the original statement to
1492 the list if we know that it is not the last. */
1493 if (prev_stmt)
1494 stmts->safe_push (prev_stmt);
1496 var = vect_recog_temp_ssa_var (new_type, NULL);
1497 pattern_stmt
1498 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1499 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1500 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1502 if (dump_enabled_p ())
1504 dump_printf_loc (MSG_NOTE, vect_location,
1505 "created pattern stmt: ");
1506 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1509 type = gimple_expr_type (stmt);
1510 prev_stmt = stmt;
1511 stmt = use_stmt;
1513 first = false;
1516 /* We got a sequence. We expect it to end with a type demotion operation.
1517 Otherwise, we quit (for now). There are three possible cases: the
1518 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1519 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1520 NEW_TYPE differs (we create a new conversion statement). */
1521 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1523 use_lhs = gimple_assign_lhs (use_stmt);
1524 use_type = TREE_TYPE (use_lhs);
1525 /* Support only type demotion or signedess change. */
1526 if (!INTEGRAL_TYPE_P (use_type)
1527 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1528 return NULL;
1530 /* Check that NEW_TYPE is not bigger than the conversion result. */
1531 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1532 return NULL;
1534 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1535 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1537 *type_out = get_vectype_for_scalar_type (use_type);
1538 if (!*type_out)
1539 return NULL;
1541 /* Create NEW_TYPE->USE_TYPE conversion. */
1542 new_oprnd = make_ssa_name (use_type);
1543 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1544 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1546 /* We created a pattern statement for the last statement in the
1547 sequence, so we don't need to associate it with the pattern
1548 statement created for PREV_STMT. Therefore, we add PREV_STMT
1549 to the list in order to mark it later in vect_pattern_recog_1. */
1550 if (prev_stmt)
1551 stmts->safe_push (prev_stmt);
1553 else
1555 if (prev_stmt)
1556 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1557 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1559 *type_out = vectype;
1562 stmts->safe_push (use_stmt);
1564 else
1565 /* TODO: support general case, create a conversion to the correct type. */
1566 return NULL;
1568 /* Pattern detected. */
1569 vect_pattern_detected ("vect_recog_over_widening_pattern", stmts->last ());
1571 return pattern_stmt;
1574 /* Detect widening shift pattern:
1576 type a_t;
1577 TYPE a_T, res_T;
1579 S1 a_t = ;
1580 S2 a_T = (TYPE) a_t;
1581 S3 res_T = a_T << CONST;
1583 where type 'TYPE' is at least double the size of type 'type'.
1585 Also detect cases where the shift result is immediately converted
1586 to another type 'result_type' that is no larger in size than 'TYPE'.
1587 In those cases we perform a widen-shift that directly results in
1588 'result_type', to avoid a possible over-widening situation:
1590 type a_t;
1591 TYPE a_T, res_T;
1592 result_type res_result;
1594 S1 a_t = ;
1595 S2 a_T = (TYPE) a_t;
1596 S3 res_T = a_T << CONST;
1597 S4 res_result = (result_type) res_T;
1598 '--> res_result' = a_t w<< CONST;
1600 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1601 create an additional pattern stmt for S2 to create a variable of an
1602 intermediate type, and perform widen-shift on the intermediate type:
1604 type a_t;
1605 interm_type a_it;
1606 TYPE a_T, res_T, res_T';
1608 S1 a_t = ;
1609 S2 a_T = (TYPE) a_t;
1610 '--> a_it = (interm_type) a_t;
1611 S3 res_T = a_T << CONST;
1612 '--> res_T' = a_it <<* CONST;
1614 Input/Output:
1616 * STMTS: Contains a stmt from which the pattern search begins.
1617 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1618 in STMTS. When an intermediate type is used and a pattern statement is
1619 created for S2, we also put S2 here (before S3).
1621 Output:
1623 * TYPE_OUT: The type of the output of this pattern.
1625 * Return value: A new stmt that will be used to replace the sequence of
1626 stmts that constitute the pattern. In this case it will be:
1627 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1629 static gimple *
1630 vect_recog_widen_shift_pattern (vec<gimple *> *stmts, tree *type_out)
1632 gimple *last_stmt = stmts->pop ();
1633 gimple *def_stmt0;
1634 tree oprnd0, oprnd1;
1635 tree type, half_type0;
1636 gimple *pattern_stmt;
1637 tree vectype, vectype_out = NULL_TREE;
1638 tree var;
1639 enum tree_code dummy_code;
1640 int dummy_int;
1641 vec<tree> dummy_vec;
1642 gimple *use_stmt;
1643 bool promotion;
1645 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1646 return NULL;
1648 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1649 return NULL;
1651 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1652 return NULL;
1654 oprnd0 = gimple_assign_rhs1 (last_stmt);
1655 oprnd1 = gimple_assign_rhs2 (last_stmt);
1656 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1657 return NULL;
1659 /* Check operand 0: it has to be defined by a type promotion. */
1660 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1661 &promotion)
1662 || !promotion)
1663 return NULL;
1665 /* Check operand 1: has to be positive. We check that it fits the type
1666 in vect_handle_widen_op_by_const (). */
1667 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1668 return NULL;
1670 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1671 type = gimple_expr_type (last_stmt);
1673 /* Check for subsequent conversion to another type. */
1674 use_stmt = vect_single_imm_use (last_stmt);
1675 if (use_stmt && is_gimple_assign (use_stmt)
1676 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1677 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1679 tree use_lhs = gimple_assign_lhs (use_stmt);
1680 tree use_type = TREE_TYPE (use_lhs);
1682 if (INTEGRAL_TYPE_P (use_type)
1683 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1685 last_stmt = use_stmt;
1686 type = use_type;
1690 /* Check if this a widening operation. */
1691 gimple *wstmt = NULL;
1692 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1693 &oprnd0, &wstmt,
1694 type, &half_type0, def_stmt0))
1695 return NULL;
1697 /* Pattern detected. */
1698 vect_pattern_detected ("vect_recog_widen_shift_pattern", last_stmt);
1700 /* Check target support. */
1701 vectype = get_vectype_for_scalar_type (half_type0);
1702 vectype_out = get_vectype_for_scalar_type (type);
1704 if (!vectype
1705 || !vectype_out
1706 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1707 vectype_out, vectype,
1708 &dummy_code, &dummy_code,
1709 &dummy_int, &dummy_vec))
1710 return NULL;
1712 *type_out = vectype_out;
1714 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1715 var = vect_recog_temp_ssa_var (type, NULL);
1716 pattern_stmt
1717 = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1718 if (wstmt)
1720 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1721 new_pattern_def_seq (stmt_vinfo, wstmt);
1722 stmt_vec_info new_stmt_info
1723 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1724 set_vinfo_for_stmt (wstmt, new_stmt_info);
1725 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1728 stmts->safe_push (last_stmt);
1729 return pattern_stmt;
1732 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1734 type a_t, b_t, c_t;
1736 S0 a_t = b_t r<< c_t;
1738 Input/Output:
1740 * STMTS: Contains a stmt from which the pattern search begins,
1741 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1742 with a sequence:
1744 S1 d_t = -c_t;
1745 S2 e_t = d_t & (B - 1);
1746 S3 f_t = b_t << c_t;
1747 S4 g_t = b_t >> e_t;
1748 S0 a_t = f_t | g_t;
1750 where B is element bitsize of type.
1752 Output:
1754 * TYPE_OUT: The type of the output of this pattern.
1756 * Return value: A new stmt that will be used to replace the rotate
1757 S0 stmt. */
1759 static gimple *
1760 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_out)
1762 gimple *last_stmt = stmts->pop ();
1763 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1764 gimple *pattern_stmt, *def_stmt;
1765 enum tree_code rhs_code;
1766 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1767 vec_info *vinfo = stmt_vinfo->vinfo;
1768 enum vect_def_type dt;
1769 optab optab1, optab2;
1770 edge ext_def = NULL;
1772 if (!is_gimple_assign (last_stmt))
1773 return NULL;
1775 rhs_code = gimple_assign_rhs_code (last_stmt);
1776 switch (rhs_code)
1778 case LROTATE_EXPR:
1779 case RROTATE_EXPR:
1780 break;
1781 default:
1782 return NULL;
1785 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1786 return NULL;
1788 lhs = gimple_assign_lhs (last_stmt);
1789 oprnd0 = gimple_assign_rhs1 (last_stmt);
1790 type = TREE_TYPE (oprnd0);
1791 oprnd1 = gimple_assign_rhs2 (last_stmt);
1792 if (TREE_CODE (oprnd0) != SSA_NAME
1793 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1794 || !INTEGRAL_TYPE_P (type)
1795 || !TYPE_UNSIGNED (type))
1796 return NULL;
1798 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1799 return NULL;
1801 if (dt != vect_internal_def
1802 && dt != vect_constant_def
1803 && dt != vect_external_def)
1804 return NULL;
1806 vectype = get_vectype_for_scalar_type (type);
1807 if (vectype == NULL_TREE)
1808 return NULL;
1810 /* If vector/vector or vector/scalar rotate is supported by the target,
1811 don't do anything here. */
1812 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1813 if (optab1
1814 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1815 return NULL;
1817 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1819 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1820 if (optab2
1821 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1822 return NULL;
1825 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1826 don't do anything here either. */
1827 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1828 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1829 if (!optab1
1830 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1831 || !optab2
1832 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1834 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1835 return NULL;
1836 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1837 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1838 if (!optab1
1839 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1840 || !optab2
1841 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1842 return NULL;
1845 *type_out = vectype;
1847 if (dt == vect_external_def
1848 && TREE_CODE (oprnd1) == SSA_NAME
1849 && is_a <loop_vec_info> (vinfo))
1851 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1852 ext_def = loop_preheader_edge (loop);
1853 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1855 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1856 if (bb == NULL
1857 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1858 ext_def = NULL;
1862 def = NULL_TREE;
1863 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1864 if (TREE_CODE (oprnd1) == INTEGER_CST
1865 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1866 def = oprnd1;
1867 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1869 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1870 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1871 && TYPE_PRECISION (TREE_TYPE (rhs1))
1872 == TYPE_PRECISION (type))
1873 def = rhs1;
1876 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1877 if (def == NULL_TREE)
1879 def = vect_recog_temp_ssa_var (type, NULL);
1880 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1881 if (ext_def)
1883 basic_block new_bb
1884 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1885 gcc_assert (!new_bb);
1887 else
1888 append_pattern_def_seq (stmt_vinfo, def_stmt);
1890 stype = TREE_TYPE (def);
1891 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1893 if (TREE_CODE (def) == INTEGER_CST)
1895 if (!tree_fits_uhwi_p (def)
1896 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
1897 || integer_zerop (def))
1898 return NULL;
1899 def2 = build_int_cst (stype,
1900 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
1902 else
1904 tree vecstype = get_vectype_for_scalar_type (stype);
1905 stmt_vec_info def_stmt_vinfo;
1907 if (vecstype == NULL_TREE)
1908 return NULL;
1909 def2 = vect_recog_temp_ssa_var (stype, NULL);
1910 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1911 if (ext_def)
1913 basic_block new_bb
1914 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1915 gcc_assert (!new_bb);
1917 else
1919 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1920 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1921 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1922 append_pattern_def_seq (stmt_vinfo, def_stmt);
1925 def2 = vect_recog_temp_ssa_var (stype, NULL);
1926 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
1927 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1928 gimple_assign_lhs (def_stmt), mask);
1929 if (ext_def)
1931 basic_block new_bb
1932 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1933 gcc_assert (!new_bb);
1935 else
1937 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1938 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1939 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1940 append_pattern_def_seq (stmt_vinfo, def_stmt);
1944 var1 = vect_recog_temp_ssa_var (type, NULL);
1945 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1946 ? LSHIFT_EXPR : RSHIFT_EXPR,
1947 oprnd0, def);
1948 append_pattern_def_seq (stmt_vinfo, def_stmt);
1950 var2 = vect_recog_temp_ssa_var (type, NULL);
1951 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1952 ? RSHIFT_EXPR : LSHIFT_EXPR,
1953 oprnd0, def2);
1954 append_pattern_def_seq (stmt_vinfo, def_stmt);
1956 /* Pattern detected. */
1957 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
1959 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1960 var = vect_recog_temp_ssa_var (type, NULL);
1961 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1963 stmts->safe_push (last_stmt);
1964 return pattern_stmt;
1967 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1968 vectorized:
1970 type a_t;
1971 TYPE b_T, res_T;
1973 S1 a_t = ;
1974 S2 b_T = ;
1975 S3 res_T = b_T op a_t;
1977 where type 'TYPE' is a type with different size than 'type',
1978 and op is <<, >> or rotate.
1980 Also detect cases:
1982 type a_t;
1983 TYPE b_T, c_T, res_T;
1985 S0 c_T = ;
1986 S1 a_t = (type) c_T;
1987 S2 b_T = ;
1988 S3 res_T = b_T op a_t;
1990 Input/Output:
1992 * STMTS: Contains a stmt from which the pattern search begins,
1993 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1994 with a shift/rotate which has same type on both operands, in the
1995 second case just b_T op c_T, in the first case with added cast
1996 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1998 Output:
2000 * TYPE_OUT: The type of the output of this pattern.
2002 * Return value: A new stmt that will be used to replace the shift/rotate
2003 S3 stmt. */
2005 static gimple *
2006 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts, tree *type_out)
2008 gimple *last_stmt = stmts->pop ();
2009 tree oprnd0, oprnd1, lhs, var;
2010 gimple *pattern_stmt;
2011 enum tree_code rhs_code;
2012 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2013 vec_info *vinfo = stmt_vinfo->vinfo;
2015 if (!is_gimple_assign (last_stmt))
2016 return NULL;
2018 rhs_code = gimple_assign_rhs_code (last_stmt);
2019 switch (rhs_code)
2021 case LSHIFT_EXPR:
2022 case RSHIFT_EXPR:
2023 case LROTATE_EXPR:
2024 case RROTATE_EXPR:
2025 break;
2026 default:
2027 return NULL;
2030 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2031 return NULL;
2033 lhs = gimple_assign_lhs (last_stmt);
2034 oprnd0 = gimple_assign_rhs1 (last_stmt);
2035 oprnd1 = gimple_assign_rhs2 (last_stmt);
2036 if (TREE_CODE (oprnd0) != SSA_NAME
2037 || TREE_CODE (oprnd1) != SSA_NAME
2038 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2039 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2040 || TYPE_PRECISION (TREE_TYPE (lhs))
2041 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2042 return NULL;
2044 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2045 if (!def_vinfo)
2046 return NULL;
2048 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2049 if (*type_out == NULL_TREE)
2050 return NULL;
2052 tree def = NULL_TREE;
2053 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2054 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo)
2055 && def_stmt
2056 && gimple_assign_cast_p (def_stmt))
2058 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2059 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2060 && TYPE_PRECISION (TREE_TYPE (rhs1))
2061 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2063 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2064 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2065 def = rhs1;
2066 else
2068 tree mask
2069 = build_low_bits_mask (TREE_TYPE (rhs1),
2070 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2071 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2072 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2073 stmt_vec_info new_stmt_info
2074 = new_stmt_vec_info (def_stmt, vinfo);
2075 set_vinfo_for_stmt (def_stmt, new_stmt_info);
2076 STMT_VINFO_VECTYPE (new_stmt_info)
2077 = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2078 new_pattern_def_seq (stmt_vinfo, def_stmt);
2083 if (def == NULL_TREE)
2085 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2086 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2087 new_pattern_def_seq (stmt_vinfo, def_stmt);
2090 /* Pattern detected. */
2091 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2093 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2094 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2095 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2097 stmts->safe_push (last_stmt);
2098 return pattern_stmt;
2101 /* Return true iff the target has a vector optab implementing the operation
2102 CODE on type VECTYPE. */
2104 static bool
2105 target_has_vecop_for_code (tree_code code, tree vectype)
2107 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2108 return voptab
2109 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2112 /* Verify that the target has optabs of VECTYPE to perform all the steps
2113 needed by the multiplication-by-immediate synthesis algorithm described by
2114 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2115 present. Return true iff the target supports all the steps. */
2117 static bool
2118 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2119 tree vectype, bool synth_shift_p)
2121 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2122 return false;
2124 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2125 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2127 if (var == negate_variant
2128 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2129 return false;
2131 /* If we must synthesize shifts with additions make sure that vector
2132 addition is available. */
2133 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2134 return false;
2136 for (int i = 1; i < alg->ops; i++)
2138 switch (alg->op[i])
2140 case alg_shift:
2141 break;
2142 case alg_add_t_m2:
2143 case alg_add_t2_m:
2144 case alg_add_factor:
2145 if (!supports_vplus)
2146 return false;
2147 break;
2148 case alg_sub_t_m2:
2149 case alg_sub_t2_m:
2150 case alg_sub_factor:
2151 if (!supports_vminus)
2152 return false;
2153 break;
2154 case alg_unknown:
2155 case alg_m:
2156 case alg_zero:
2157 case alg_impossible:
2158 return false;
2159 default:
2160 gcc_unreachable ();
2164 return true;
2167 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2168 putting the final result in DEST. Append all statements but the last into
2169 VINFO. Return the last statement. */
2171 static gimple *
2172 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2173 stmt_vec_info vinfo)
2175 HOST_WIDE_INT i;
2176 tree itype = TREE_TYPE (op);
2177 tree prev_res = op;
2178 gcc_assert (amnt >= 0);
2179 for (i = 0; i < amnt; i++)
2181 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2182 : dest;
2183 gimple *stmt
2184 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2185 prev_res = tmp_var;
2186 if (i < amnt - 1)
2187 append_pattern_def_seq (vinfo, stmt);
2188 else
2189 return stmt;
2191 gcc_unreachable ();
2192 return NULL;
2195 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2196 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2197 the process if necessary. Append the resulting assignment statements
2198 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2199 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2200 left shifts using additions. */
2202 static tree
2203 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2204 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2206 if (integer_zerop (op2)
2207 && (code == LSHIFT_EXPR
2208 || code == PLUS_EXPR))
2210 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2211 return op1;
2214 gimple *stmt;
2215 tree itype = TREE_TYPE (op1);
2216 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2218 if (code == LSHIFT_EXPR
2219 && synth_shift_p)
2221 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2222 stmt_vinfo);
2223 append_pattern_def_seq (stmt_vinfo, stmt);
2224 return tmp_var;
2227 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2228 append_pattern_def_seq (stmt_vinfo, stmt);
2229 return tmp_var;
2232 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2233 and simple arithmetic operations to be vectorized. Record the statements
2234 produced in STMT_VINFO and return the last statement in the sequence or
2235 NULL if it's not possible to synthesize such a multiplication.
2236 This function mirrors the behavior of expand_mult_const in expmed.c but
2237 works on tree-ssa form. */
2239 static gimple *
2240 vect_synth_mult_by_constant (tree op, tree val,
2241 stmt_vec_info stmt_vinfo)
2243 tree itype = TREE_TYPE (op);
2244 machine_mode mode = TYPE_MODE (itype);
2245 struct algorithm alg;
2246 mult_variant variant;
2247 if (!tree_fits_shwi_p (val))
2248 return NULL;
2250 /* Multiplication synthesis by shifts, adds and subs can introduce
2251 signed overflow where the original operation didn't. Perform the
2252 operations on an unsigned type and cast back to avoid this.
2253 In the future we may want to relax this for synthesis algorithms
2254 that we can prove do not cause unexpected overflow. */
2255 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2257 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2259 /* Targets that don't support vector shifts but support vector additions
2260 can synthesize shifts that way. */
2261 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2263 HOST_WIDE_INT hwval = tree_to_shwi (val);
2264 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2265 The vectorizer's benefit analysis will decide whether it's beneficial
2266 to do this. */
2267 bool possible = choose_mult_variant (mode, hwval, &alg,
2268 &variant, MAX_COST);
2269 if (!possible)
2270 return NULL;
2272 tree vectype = get_vectype_for_scalar_type (multtype);
2274 if (!vectype
2275 || !target_supports_mult_synth_alg (&alg, variant,
2276 vectype, synth_shift_p))
2277 return NULL;
2279 tree accumulator;
2281 /* Clear out the sequence of statements so we can populate it below. */
2282 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2283 gimple *stmt = NULL;
2285 if (cast_to_unsigned_p)
2287 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2288 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2289 append_pattern_def_seq (stmt_vinfo, stmt);
2290 op = tmp_op;
2293 if (alg.op[0] == alg_zero)
2294 accumulator = build_int_cst (multtype, 0);
2295 else
2296 accumulator = op;
2298 bool needs_fixup = (variant == negate_variant)
2299 || (variant == add_variant);
2301 for (int i = 1; i < alg.ops; i++)
2303 tree shft_log = build_int_cst (multtype, alg.log[i]);
2304 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2305 tree tmp_var = NULL_TREE;
2307 switch (alg.op[i])
2309 case alg_shift:
2310 if (synth_shift_p)
2311 stmt
2312 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2313 stmt_vinfo);
2314 else
2315 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2316 shft_log);
2317 break;
2318 case alg_add_t_m2:
2319 tmp_var
2320 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2321 stmt_vinfo, synth_shift_p);
2322 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2323 tmp_var);
2324 break;
2325 case alg_sub_t_m2:
2326 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2327 shft_log, stmt_vinfo,
2328 synth_shift_p);
2329 /* In some algorithms the first step involves zeroing the
2330 accumulator. If subtracting from such an accumulator
2331 just emit the negation directly. */
2332 if (integer_zerop (accumulator))
2333 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2334 else
2335 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2336 tmp_var);
2337 break;
2338 case alg_add_t2_m:
2339 tmp_var
2340 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2341 stmt_vinfo, synth_shift_p);
2342 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2343 break;
2344 case alg_sub_t2_m:
2345 tmp_var
2346 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2347 stmt_vinfo, synth_shift_p);
2348 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2349 break;
2350 case alg_add_factor:
2351 tmp_var
2352 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2353 stmt_vinfo, synth_shift_p);
2354 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2355 tmp_var);
2356 break;
2357 case alg_sub_factor:
2358 tmp_var
2359 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2360 stmt_vinfo, synth_shift_p);
2361 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2362 accumulator);
2363 break;
2364 default:
2365 gcc_unreachable ();
2367 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2368 but rather return it directly. */
2370 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2371 append_pattern_def_seq (stmt_vinfo, stmt);
2372 accumulator = accum_tmp;
2374 if (variant == negate_variant)
2376 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2377 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2378 accumulator = accum_tmp;
2379 if (cast_to_unsigned_p)
2380 append_pattern_def_seq (stmt_vinfo, stmt);
2382 else if (variant == add_variant)
2384 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2385 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2386 accumulator = accum_tmp;
2387 if (cast_to_unsigned_p)
2388 append_pattern_def_seq (stmt_vinfo, stmt);
2390 /* Move back to a signed if needed. */
2391 if (cast_to_unsigned_p)
2393 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2394 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2397 return stmt;
2400 /* Detect multiplication by constant and convert it into a sequence of
2401 shifts and additions, subtractions, negations. We reuse the
2402 choose_mult_variant algorithms from expmed.c
2404 Input/Output:
2406 STMTS: Contains a stmt from which the pattern search begins,
2407 i.e. the mult stmt.
2409 Output:
2411 * TYPE_OUT: The type of the output of this pattern.
2413 * Return value: A new stmt that will be used to replace
2414 the multiplication. */
2416 static gimple *
2417 vect_recog_mult_pattern (vec<gimple *> *stmts, tree *type_out)
2419 gimple *last_stmt = stmts->pop ();
2420 tree oprnd0, oprnd1, vectype, itype;
2421 gimple *pattern_stmt;
2422 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2424 if (!is_gimple_assign (last_stmt))
2425 return NULL;
2427 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2428 return NULL;
2430 oprnd0 = gimple_assign_rhs1 (last_stmt);
2431 oprnd1 = gimple_assign_rhs2 (last_stmt);
2432 itype = TREE_TYPE (oprnd0);
2434 if (TREE_CODE (oprnd0) != SSA_NAME
2435 || TREE_CODE (oprnd1) != INTEGER_CST
2436 || !INTEGRAL_TYPE_P (itype)
2437 || !type_has_mode_precision_p (itype))
2438 return NULL;
2440 vectype = get_vectype_for_scalar_type (itype);
2441 if (vectype == NULL_TREE)
2442 return NULL;
2444 /* If the target can handle vectorized multiplication natively,
2445 don't attempt to optimize this. */
2446 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2447 if (mul_optab != unknown_optab)
2449 machine_mode vec_mode = TYPE_MODE (vectype);
2450 int icode = (int) optab_handler (mul_optab, vec_mode);
2451 if (icode != CODE_FOR_nothing)
2452 return NULL;
2455 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2456 if (!pattern_stmt)
2457 return NULL;
2459 /* Pattern detected. */
2460 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2462 stmts->safe_push (last_stmt);
2463 *type_out = vectype;
2465 return pattern_stmt;
2468 /* Detect a signed division by a constant that wouldn't be
2469 otherwise vectorized:
2471 type a_t, b_t;
2473 S1 a_t = b_t / N;
2475 where type 'type' is an integral type and N is a constant.
2477 Similarly handle modulo by a constant:
2479 S4 a_t = b_t % N;
2481 Input/Output:
2483 * STMTS: Contains a stmt from which the pattern search begins,
2484 i.e. the division stmt. S1 is replaced by if N is a power
2485 of two constant and type is signed:
2486 S3 y_t = b_t < 0 ? N - 1 : 0;
2487 S2 x_t = b_t + y_t;
2488 S1' a_t = x_t >> log2 (N);
2490 S4 is replaced if N is a power of two constant and
2491 type is signed by (where *_T temporaries have unsigned type):
2492 S9 y_T = b_t < 0 ? -1U : 0U;
2493 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2494 S7 z_t = (type) z_T;
2495 S6 w_t = b_t + z_t;
2496 S5 x_t = w_t & (N - 1);
2497 S4' a_t = x_t - z_t;
2499 Output:
2501 * TYPE_OUT: The type of the output of this pattern.
2503 * Return value: A new stmt that will be used to replace the division
2504 S1 or modulo S4 stmt. */
2506 static gimple *
2507 vect_recog_divmod_pattern (vec<gimple *> *stmts, tree *type_out)
2509 gimple *last_stmt = stmts->pop ();
2510 tree oprnd0, oprnd1, vectype, itype, cond;
2511 gimple *pattern_stmt, *def_stmt;
2512 enum tree_code rhs_code;
2513 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2514 vec_info *vinfo = stmt_vinfo->vinfo;
2515 optab optab;
2516 tree q;
2517 int dummy_int, prec;
2518 stmt_vec_info def_stmt_vinfo;
2520 if (!is_gimple_assign (last_stmt))
2521 return NULL;
2523 rhs_code = gimple_assign_rhs_code (last_stmt);
2524 switch (rhs_code)
2526 case TRUNC_DIV_EXPR:
2527 case TRUNC_MOD_EXPR:
2528 break;
2529 default:
2530 return NULL;
2533 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2534 return NULL;
2536 oprnd0 = gimple_assign_rhs1 (last_stmt);
2537 oprnd1 = gimple_assign_rhs2 (last_stmt);
2538 itype = TREE_TYPE (oprnd0);
2539 if (TREE_CODE (oprnd0) != SSA_NAME
2540 || TREE_CODE (oprnd1) != INTEGER_CST
2541 || TREE_CODE (itype) != INTEGER_TYPE
2542 || !type_has_mode_precision_p (itype))
2543 return NULL;
2545 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2546 vectype = get_vectype_for_scalar_type (itype);
2547 if (vectype == NULL_TREE)
2548 return NULL;
2550 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2552 /* If the target can handle vectorized division or modulo natively,
2553 don't attempt to optimize this, since native division is likely
2554 to give smaller code. */
2555 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2556 if (optab != unknown_optab)
2558 machine_mode vec_mode = TYPE_MODE (vectype);
2559 int icode = (int) optab_handler (optab, vec_mode);
2560 if (icode != CODE_FOR_nothing)
2561 return NULL;
2565 prec = TYPE_PRECISION (itype);
2566 if (integer_pow2p (oprnd1))
2568 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2569 return NULL;
2571 /* Pattern detected. */
2572 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2574 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2575 build_int_cst (itype, 0));
2576 if (rhs_code == TRUNC_DIV_EXPR)
2578 tree var = vect_recog_temp_ssa_var (itype, NULL);
2579 tree shift;
2580 def_stmt
2581 = gimple_build_assign (var, COND_EXPR, cond,
2582 fold_build2 (MINUS_EXPR, itype, oprnd1,
2583 build_int_cst (itype, 1)),
2584 build_int_cst (itype, 0));
2585 new_pattern_def_seq (stmt_vinfo, def_stmt);
2586 var = vect_recog_temp_ssa_var (itype, NULL);
2587 def_stmt
2588 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2589 gimple_assign_lhs (def_stmt));
2590 append_pattern_def_seq (stmt_vinfo, def_stmt);
2592 shift = build_int_cst (itype, tree_log2 (oprnd1));
2593 pattern_stmt
2594 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2595 RSHIFT_EXPR, var, shift);
2597 else
2599 tree signmask;
2600 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2601 if (compare_tree_int (oprnd1, 2) == 0)
2603 signmask = vect_recog_temp_ssa_var (itype, NULL);
2604 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2605 build_int_cst (itype, 1),
2606 build_int_cst (itype, 0));
2607 append_pattern_def_seq (stmt_vinfo, def_stmt);
2609 else
2611 tree utype
2612 = build_nonstandard_integer_type (prec, 1);
2613 tree vecutype = get_vectype_for_scalar_type (utype);
2614 tree shift
2615 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2616 - tree_log2 (oprnd1));
2617 tree var = vect_recog_temp_ssa_var (utype, NULL);
2619 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2620 build_int_cst (utype, -1),
2621 build_int_cst (utype, 0));
2622 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2623 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2624 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2625 append_pattern_def_seq (stmt_vinfo, def_stmt);
2626 var = vect_recog_temp_ssa_var (utype, NULL);
2627 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2628 gimple_assign_lhs (def_stmt),
2629 shift);
2630 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2631 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2632 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2633 append_pattern_def_seq (stmt_vinfo, def_stmt);
2634 signmask = vect_recog_temp_ssa_var (itype, NULL);
2635 def_stmt
2636 = gimple_build_assign (signmask, NOP_EXPR, var);
2637 append_pattern_def_seq (stmt_vinfo, def_stmt);
2639 def_stmt
2640 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2641 PLUS_EXPR, oprnd0, signmask);
2642 append_pattern_def_seq (stmt_vinfo, def_stmt);
2643 def_stmt
2644 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2645 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2646 fold_build2 (MINUS_EXPR, itype, oprnd1,
2647 build_int_cst (itype, 1)));
2648 append_pattern_def_seq (stmt_vinfo, def_stmt);
2650 pattern_stmt
2651 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2652 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2653 signmask);
2656 stmts->safe_push (last_stmt);
2658 *type_out = vectype;
2659 return pattern_stmt;
2662 if (prec > HOST_BITS_PER_WIDE_INT
2663 || integer_zerop (oprnd1))
2664 return NULL;
2666 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2667 return NULL;
2669 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2671 if (TYPE_UNSIGNED (itype))
2673 unsigned HOST_WIDE_INT mh, ml;
2674 int pre_shift, post_shift;
2675 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2676 & GET_MODE_MASK (itype_mode));
2677 tree t1, t2, t3, t4;
2679 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2680 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2681 return NULL;
2683 /* Find a suitable multiplier and right shift count
2684 instead of multiplying with D. */
2685 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2687 /* If the suggested multiplier is more than SIZE bits, we can do better
2688 for even divisors, using an initial right shift. */
2689 if (mh != 0 && (d & 1) == 0)
2691 pre_shift = ctz_or_zero (d);
2692 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2693 &ml, &post_shift, &dummy_int);
2694 gcc_assert (!mh);
2696 else
2697 pre_shift = 0;
2699 if (mh != 0)
2701 if (post_shift - 1 >= prec)
2702 return NULL;
2704 /* t1 = oprnd0 h* ml;
2705 t2 = oprnd0 - t1;
2706 t3 = t2 >> 1;
2707 t4 = t1 + t3;
2708 q = t4 >> (post_shift - 1); */
2709 t1 = vect_recog_temp_ssa_var (itype, NULL);
2710 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2711 build_int_cst (itype, ml));
2712 append_pattern_def_seq (stmt_vinfo, def_stmt);
2714 t2 = vect_recog_temp_ssa_var (itype, NULL);
2715 def_stmt
2716 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2717 append_pattern_def_seq (stmt_vinfo, def_stmt);
2719 t3 = vect_recog_temp_ssa_var (itype, NULL);
2720 def_stmt
2721 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2722 append_pattern_def_seq (stmt_vinfo, def_stmt);
2724 t4 = vect_recog_temp_ssa_var (itype, NULL);
2725 def_stmt
2726 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2728 if (post_shift != 1)
2730 append_pattern_def_seq (stmt_vinfo, def_stmt);
2732 q = vect_recog_temp_ssa_var (itype, NULL);
2733 pattern_stmt
2734 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2735 build_int_cst (itype, post_shift - 1));
2737 else
2739 q = t4;
2740 pattern_stmt = def_stmt;
2743 else
2745 if (pre_shift >= prec || post_shift >= prec)
2746 return NULL;
2748 /* t1 = oprnd0 >> pre_shift;
2749 t2 = t1 h* ml;
2750 q = t2 >> post_shift; */
2751 if (pre_shift)
2753 t1 = vect_recog_temp_ssa_var (itype, NULL);
2754 def_stmt
2755 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2756 build_int_cst (NULL, pre_shift));
2757 append_pattern_def_seq (stmt_vinfo, def_stmt);
2759 else
2760 t1 = oprnd0;
2762 t2 = vect_recog_temp_ssa_var (itype, NULL);
2763 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2764 build_int_cst (itype, ml));
2766 if (post_shift)
2768 append_pattern_def_seq (stmt_vinfo, def_stmt);
2770 q = vect_recog_temp_ssa_var (itype, NULL);
2771 def_stmt
2772 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2773 build_int_cst (itype, post_shift));
2775 else
2776 q = t2;
2778 pattern_stmt = def_stmt;
2781 else
2783 unsigned HOST_WIDE_INT ml;
2784 int post_shift;
2785 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2786 unsigned HOST_WIDE_INT abs_d;
2787 bool add = false;
2788 tree t1, t2, t3, t4;
2790 /* Give up for -1. */
2791 if (d == -1)
2792 return NULL;
2794 /* Since d might be INT_MIN, we have to cast to
2795 unsigned HOST_WIDE_INT before negating to avoid
2796 undefined signed overflow. */
2797 abs_d = (d >= 0
2798 ? (unsigned HOST_WIDE_INT) d
2799 : - (unsigned HOST_WIDE_INT) d);
2801 /* n rem d = n rem -d */
2802 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2804 d = abs_d;
2805 oprnd1 = build_int_cst (itype, abs_d);
2807 else if (HOST_BITS_PER_WIDE_INT >= prec
2808 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2809 /* This case is not handled correctly below. */
2810 return NULL;
2812 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2813 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2815 add = true;
2816 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2818 if (post_shift >= prec)
2819 return NULL;
2821 /* t1 = oprnd0 h* ml; */
2822 t1 = vect_recog_temp_ssa_var (itype, NULL);
2823 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2824 build_int_cst (itype, ml));
2826 if (add)
2828 /* t2 = t1 + oprnd0; */
2829 append_pattern_def_seq (stmt_vinfo, def_stmt);
2830 t2 = vect_recog_temp_ssa_var (itype, NULL);
2831 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2833 else
2834 t2 = t1;
2836 if (post_shift)
2838 /* t3 = t2 >> post_shift; */
2839 append_pattern_def_seq (stmt_vinfo, def_stmt);
2840 t3 = vect_recog_temp_ssa_var (itype, NULL);
2841 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2842 build_int_cst (itype, post_shift));
2844 else
2845 t3 = t2;
2847 wide_int oprnd0_min, oprnd0_max;
2848 int msb = 1;
2849 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2851 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2852 msb = 0;
2853 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2854 msb = -1;
2857 if (msb == 0 && d >= 0)
2859 /* q = t3; */
2860 q = t3;
2861 pattern_stmt = def_stmt;
2863 else
2865 /* t4 = oprnd0 >> (prec - 1);
2866 or if we know from VRP that oprnd0 >= 0
2867 t4 = 0;
2868 or if we know from VRP that oprnd0 < 0
2869 t4 = -1; */
2870 append_pattern_def_seq (stmt_vinfo, def_stmt);
2871 t4 = vect_recog_temp_ssa_var (itype, NULL);
2872 if (msb != 1)
2873 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2874 build_int_cst (itype, msb));
2875 else
2876 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2877 build_int_cst (itype, prec - 1));
2878 append_pattern_def_seq (stmt_vinfo, def_stmt);
2880 /* q = t3 - t4; or q = t4 - t3; */
2881 q = vect_recog_temp_ssa_var (itype, NULL);
2882 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2883 d < 0 ? t3 : t4);
2887 if (rhs_code == TRUNC_MOD_EXPR)
2889 tree r, t1;
2891 /* We divided. Now finish by:
2892 t1 = q * oprnd1;
2893 r = oprnd0 - t1; */
2894 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2896 t1 = vect_recog_temp_ssa_var (itype, NULL);
2897 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2898 append_pattern_def_seq (stmt_vinfo, def_stmt);
2900 r = vect_recog_temp_ssa_var (itype, NULL);
2901 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2904 /* Pattern detected. */
2905 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2907 stmts->safe_push (last_stmt);
2909 *type_out = vectype;
2910 return pattern_stmt;
2913 /* Function vect_recog_mixed_size_cond_pattern
2915 Try to find the following pattern:
2917 type x_t, y_t;
2918 TYPE a_T, b_T, c_T;
2919 loop:
2920 S1 a_T = x_t CMP y_t ? b_T : c_T;
2922 where type 'TYPE' is an integral type which has different size
2923 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2924 than 'type', the constants need to fit into an integer type
2925 with the same width as 'type') or results of conversion from 'type'.
2927 Input:
2929 * LAST_STMT: A stmt from which the pattern search begins.
2931 Output:
2933 * TYPE_OUT: The type of the output of this pattern.
2935 * Return value: A new stmt that will be used to replace the pattern.
2936 Additionally a def_stmt is added.
2938 a_it = x_t CMP y_t ? b_it : c_it;
2939 a_T = (TYPE) a_it; */
2941 static gimple *
2942 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_out)
2944 gimple *last_stmt = (*stmts)[0];
2945 tree cond_expr, then_clause, else_clause;
2946 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2947 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2948 gimple *pattern_stmt, *def_stmt;
2949 vec_info *vinfo = stmt_vinfo->vinfo;
2950 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2951 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
2952 bool promotion;
2953 tree comp_scalar_type;
2955 if (!is_gimple_assign (last_stmt)
2956 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2957 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2958 return NULL;
2960 cond_expr = gimple_assign_rhs1 (last_stmt);
2961 then_clause = gimple_assign_rhs2 (last_stmt);
2962 else_clause = gimple_assign_rhs3 (last_stmt);
2964 if (!COMPARISON_CLASS_P (cond_expr))
2965 return NULL;
2967 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2968 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2969 if (comp_vectype == NULL_TREE)
2970 return NULL;
2972 type = gimple_expr_type (last_stmt);
2973 if (types_compatible_p (type, comp_scalar_type)
2974 || ((TREE_CODE (then_clause) != INTEGER_CST
2975 || TREE_CODE (else_clause) != INTEGER_CST)
2976 && !INTEGRAL_TYPE_P (comp_scalar_type))
2977 || !INTEGRAL_TYPE_P (type))
2978 return NULL;
2980 if ((TREE_CODE (then_clause) != INTEGER_CST
2981 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2982 &def_stmt0, &promotion))
2983 || (TREE_CODE (else_clause) != INTEGER_CST
2984 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2985 &def_stmt1, &promotion)))
2986 return NULL;
2988 if (orig_type0 && orig_type1
2989 && !types_compatible_p (orig_type0, orig_type1))
2990 return NULL;
2992 if (orig_type0)
2994 if (!types_compatible_p (orig_type0, comp_scalar_type))
2995 return NULL;
2996 then_clause = gimple_assign_rhs1 (def_stmt0);
2997 itype = orig_type0;
3000 if (orig_type1)
3002 if (!types_compatible_p (orig_type1, comp_scalar_type))
3003 return NULL;
3004 else_clause = gimple_assign_rhs1 (def_stmt1);
3005 itype = orig_type1;
3009 HOST_WIDE_INT cmp_mode_size
3010 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3012 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3013 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3014 return NULL;
3016 vectype = get_vectype_for_scalar_type (type);
3017 if (vectype == NULL_TREE)
3018 return NULL;
3020 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3021 return NULL;
3023 if (itype == NULL_TREE)
3024 itype = build_nonstandard_integer_type (cmp_mode_size,
3025 TYPE_UNSIGNED (type));
3027 if (itype == NULL_TREE
3028 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3029 return NULL;
3031 vecitype = get_vectype_for_scalar_type (itype);
3032 if (vecitype == NULL_TREE)
3033 return NULL;
3035 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3036 return NULL;
3038 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3040 if ((TREE_CODE (then_clause) == INTEGER_CST
3041 && !int_fits_type_p (then_clause, itype))
3042 || (TREE_CODE (else_clause) == INTEGER_CST
3043 && !int_fits_type_p (else_clause, itype)))
3044 return NULL;
3047 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3048 COND_EXPR, unshare_expr (cond_expr),
3049 fold_convert (itype, then_clause),
3050 fold_convert (itype, else_clause));
3051 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3052 NOP_EXPR, gimple_assign_lhs (def_stmt));
3054 new_pattern_def_seq (stmt_vinfo, def_stmt);
3055 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3056 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3057 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3058 *type_out = vectype;
3060 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3062 return pattern_stmt;
3066 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3067 true if bool VAR can and should be optimized that way. Assume it shouldn't
3068 in case it's a result of a comparison which can be directly vectorized into
3069 a vector comparison. Fills in STMTS with all stmts visited during the
3070 walk. */
3072 static bool
3073 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3075 tree rhs1;
3076 enum tree_code rhs_code;
3078 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3079 if (!def_stmt_info)
3080 return false;
3082 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3083 if (!def_stmt)
3084 return false;
3086 if (stmts.contains (def_stmt))
3087 return true;
3089 rhs1 = gimple_assign_rhs1 (def_stmt);
3090 rhs_code = gimple_assign_rhs_code (def_stmt);
3091 switch (rhs_code)
3093 case SSA_NAME:
3094 if (! check_bool_pattern (rhs1, vinfo, stmts))
3095 return false;
3096 break;
3098 CASE_CONVERT:
3099 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3100 return false;
3101 if (! check_bool_pattern (rhs1, vinfo, stmts))
3102 return false;
3103 break;
3105 case BIT_NOT_EXPR:
3106 if (! check_bool_pattern (rhs1, vinfo, stmts))
3107 return false;
3108 break;
3110 case BIT_AND_EXPR:
3111 case BIT_IOR_EXPR:
3112 case BIT_XOR_EXPR:
3113 if (! check_bool_pattern (rhs1, vinfo, stmts)
3114 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3115 return false;
3116 break;
3118 default:
3119 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3121 tree vecitype, comp_vectype;
3123 /* If the comparison can throw, then is_gimple_condexpr will be
3124 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3125 if (stmt_could_throw_p (def_stmt))
3126 return false;
3128 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3129 if (comp_vectype == NULL_TREE)
3130 return false;
3132 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3133 if (mask_type
3134 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3135 return false;
3137 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3139 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3140 tree itype
3141 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3142 vecitype = get_vectype_for_scalar_type (itype);
3143 if (vecitype == NULL_TREE)
3144 return false;
3146 else
3147 vecitype = comp_vectype;
3148 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3149 return false;
3151 else
3152 return false;
3153 break;
3156 bool res = stmts.add (def_stmt);
3157 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3158 gcc_assert (!res);
3160 return true;
3164 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3165 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3166 pattern sequence. */
3168 static tree
3169 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3171 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3172 NOP_EXPR, var);
3173 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3174 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3175 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3176 append_pattern_def_seq (stmt_info, cast_stmt);
3177 return gimple_assign_lhs (cast_stmt);
3180 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3181 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3182 type, OUT_TYPE is the desired final integer type of the whole pattern.
3183 STMT_INFO is the info of the pattern root and is where pattern stmts should
3184 be associated with. DEFS is a map of pattern defs. */
3186 static void
3187 adjust_bool_pattern (tree var, tree out_type,
3188 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3190 gimple *stmt = SSA_NAME_DEF_STMT (var);
3191 enum tree_code rhs_code, def_rhs_code;
3192 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3193 location_t loc;
3194 gimple *pattern_stmt, *def_stmt;
3195 tree trueval = NULL_TREE;
3197 rhs1 = gimple_assign_rhs1 (stmt);
3198 rhs2 = gimple_assign_rhs2 (stmt);
3199 rhs_code = gimple_assign_rhs_code (stmt);
3200 loc = gimple_location (stmt);
3201 switch (rhs_code)
3203 case SSA_NAME:
3204 CASE_CONVERT:
3205 irhs1 = *defs.get (rhs1);
3206 itype = TREE_TYPE (irhs1);
3207 pattern_stmt
3208 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3209 SSA_NAME, irhs1);
3210 break;
3212 case BIT_NOT_EXPR:
3213 irhs1 = *defs.get (rhs1);
3214 itype = TREE_TYPE (irhs1);
3215 pattern_stmt
3216 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3217 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3218 break;
3220 case BIT_AND_EXPR:
3221 /* Try to optimize x = y & (a < b ? 1 : 0); into
3222 x = (a < b ? y : 0);
3224 E.g. for:
3225 bool a_b, b_b, c_b;
3226 TYPE d_T;
3228 S1 a_b = x1 CMP1 y1;
3229 S2 b_b = x2 CMP2 y2;
3230 S3 c_b = a_b & b_b;
3231 S4 d_T = (TYPE) c_b;
3233 we would normally emit:
3235 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3236 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3237 S3' c_T = a_T & b_T;
3238 S4' d_T = c_T;
3240 but we can save one stmt by using the
3241 result of one of the COND_EXPRs in the other COND_EXPR and leave
3242 BIT_AND_EXPR stmt out:
3244 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3245 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3246 S4' f_T = c_T;
3248 At least when VEC_COND_EXPR is implemented using masks
3249 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3250 computes the comparison masks and ands it, in one case with
3251 all ones vector, in the other case with a vector register.
3252 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3253 often more expensive. */
3254 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3255 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3256 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3258 irhs1 = *defs.get (rhs1);
3259 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3260 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3261 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3263 rhs_code = def_rhs_code;
3264 rhs1 = def_rhs1;
3265 rhs2 = gimple_assign_rhs2 (def_stmt);
3266 trueval = irhs1;
3267 goto do_compare;
3269 else
3270 irhs2 = *defs.get (rhs2);
3271 goto and_ior_xor;
3273 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3274 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3275 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3277 irhs2 = *defs.get (rhs2);
3278 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3279 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3280 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3282 rhs_code = def_rhs_code;
3283 rhs1 = def_rhs1;
3284 rhs2 = gimple_assign_rhs2 (def_stmt);
3285 trueval = irhs2;
3286 goto do_compare;
3288 else
3289 irhs1 = *defs.get (rhs1);
3290 goto and_ior_xor;
3292 /* FALLTHRU */
3293 case BIT_IOR_EXPR:
3294 case BIT_XOR_EXPR:
3295 irhs1 = *defs.get (rhs1);
3296 irhs2 = *defs.get (rhs2);
3297 and_ior_xor:
3298 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3299 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3301 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3302 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3303 int out_prec = TYPE_PRECISION (out_type);
3304 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3305 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3306 stmt_info);
3307 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3308 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3309 stmt_info);
3310 else
3312 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3313 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3316 itype = TREE_TYPE (irhs1);
3317 pattern_stmt
3318 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3319 rhs_code, irhs1, irhs2);
3320 break;
3322 default:
3323 do_compare:
3324 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3325 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3326 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3327 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3328 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3330 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3331 itype
3332 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3334 else
3335 itype = TREE_TYPE (rhs1);
3336 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3337 if (trueval == NULL_TREE)
3338 trueval = build_int_cst (itype, 1);
3339 else
3340 gcc_checking_assert (useless_type_conversion_p (itype,
3341 TREE_TYPE (trueval)));
3342 pattern_stmt
3343 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3344 COND_EXPR, cond_expr, trueval,
3345 build_int_cst (itype, 0));
3346 break;
3349 gimple_set_location (pattern_stmt, loc);
3350 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3351 pattern def seq stmts instead of just letting auto-detection do
3352 its work? */
3353 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3354 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3355 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3356 append_pattern_def_seq (stmt_info, pattern_stmt);
3357 defs.put (var, gimple_assign_lhs (pattern_stmt));
3360 /* Comparison function to qsort a vector of gimple stmts after UID. */
3362 static int
3363 sort_after_uid (const void *p1, const void *p2)
3365 const gimple *stmt1 = *(const gimple * const *)p1;
3366 const gimple *stmt2 = *(const gimple * const *)p2;
3367 return gimple_uid (stmt1) - gimple_uid (stmt2);
3370 /* Create pattern stmts for all stmts participating in the bool pattern
3371 specified by BOOL_STMT_SET and its root STMT with the desired type
3372 OUT_TYPE. Return the def of the pattern root. */
3374 static tree
3375 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3376 tree out_type, gimple *stmt)
3378 /* Gather original stmts in the bool pattern in their order of appearance
3379 in the IL. */
3380 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3381 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3382 i != bool_stmt_set.end (); ++i)
3383 bool_stmts.quick_push (*i);
3384 bool_stmts.qsort (sort_after_uid);
3386 /* Now process them in that order, producing pattern stmts. */
3387 hash_map <tree, tree> defs;
3388 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3389 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3390 out_type, vinfo_for_stmt (stmt), defs);
3392 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3393 gimple *pattern_stmt
3394 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3395 return gimple_assign_lhs (pattern_stmt);
3398 /* Helper for search_type_for_mask. */
3400 static tree
3401 search_type_for_mask_1 (tree var, vec_info *vinfo,
3402 hash_map<gimple *, tree> &cache)
3404 tree rhs1;
3405 enum tree_code rhs_code;
3406 tree res = NULL_TREE, res2;
3408 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3409 return NULL_TREE;
3411 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3412 if (!def_stmt_info)
3413 return NULL_TREE;
3415 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3416 if (!def_stmt)
3417 return NULL_TREE;
3419 tree *c = cache.get (def_stmt);
3420 if (c)
3421 return *c;
3423 rhs_code = gimple_assign_rhs_code (def_stmt);
3424 rhs1 = gimple_assign_rhs1 (def_stmt);
3426 switch (rhs_code)
3428 case SSA_NAME:
3429 case BIT_NOT_EXPR:
3430 CASE_CONVERT:
3431 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3432 break;
3434 case BIT_AND_EXPR:
3435 case BIT_IOR_EXPR:
3436 case BIT_XOR_EXPR:
3437 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3438 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3439 cache);
3440 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3441 res = res2;
3442 break;
3444 default:
3445 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3447 tree comp_vectype, mask_type;
3449 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3451 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3452 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3453 vinfo, cache);
3454 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3455 res = res2;
3456 break;
3459 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3460 if (comp_vectype == NULL_TREE)
3462 res = NULL_TREE;
3463 break;
3466 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3467 if (!mask_type
3468 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3470 res = NULL_TREE;
3471 break;
3474 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3475 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3477 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3478 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3480 else
3481 res = TREE_TYPE (rhs1);
3485 cache.put (def_stmt, res);
3486 return res;
3489 /* Return the proper type for converting bool VAR into
3490 an integer value or NULL_TREE if no such type exists.
3491 The type is chosen so that converted value has the
3492 same number of elements as VAR's vector type. */
3494 static tree
3495 search_type_for_mask (tree var, vec_info *vinfo)
3497 hash_map<gimple *, tree> cache;
3498 return search_type_for_mask_1 (var, vinfo, cache);
3501 /* Function vect_recog_bool_pattern
3503 Try to find pattern like following:
3505 bool a_b, b_b, c_b, d_b, e_b;
3506 TYPE f_T;
3507 loop:
3508 S1 a_b = x1 CMP1 y1;
3509 S2 b_b = x2 CMP2 y2;
3510 S3 c_b = a_b & b_b;
3511 S4 d_b = x3 CMP3 y3;
3512 S5 e_b = c_b | d_b;
3513 S6 f_T = (TYPE) e_b;
3515 where type 'TYPE' is an integral type. Or a similar pattern
3516 ending in
3518 S6 f_Y = e_b ? r_Y : s_Y;
3520 as results from if-conversion of a complex condition.
3522 Input:
3524 * LAST_STMT: A stmt at the end from which the pattern
3525 search begins, i.e. cast of a bool to
3526 an integer type.
3528 Output:
3530 * TYPE_OUT: The type of the output of this pattern.
3532 * Return value: A new stmt that will be used to replace the pattern.
3534 Assuming size of TYPE is the same as size of all comparisons
3535 (otherwise some casts would be added where needed), the above
3536 sequence we create related pattern stmts:
3537 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3538 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3539 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3540 S5' e_T = c_T | d_T;
3541 S6' f_T = e_T;
3543 Instead of the above S3' we could emit:
3544 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3545 S3' c_T = a_T | b_T;
3546 but the above is more efficient. */
3548 static gimple *
3549 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_out)
3551 gimple *last_stmt = stmts->pop ();
3552 enum tree_code rhs_code;
3553 tree var, lhs, rhs, vectype;
3554 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3555 stmt_vec_info new_stmt_info;
3556 vec_info *vinfo = stmt_vinfo->vinfo;
3557 gimple *pattern_stmt;
3559 if (!is_gimple_assign (last_stmt))
3560 return NULL;
3562 var = gimple_assign_rhs1 (last_stmt);
3563 lhs = gimple_assign_lhs (last_stmt);
3565 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3566 return NULL;
3568 hash_set<gimple *> bool_stmts;
3570 rhs_code = gimple_assign_rhs_code (last_stmt);
3571 if (CONVERT_EXPR_CODE_P (rhs_code))
3573 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3574 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3575 return NULL;
3576 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3577 if (vectype == NULL_TREE)
3578 return NULL;
3580 if (check_bool_pattern (var, vinfo, bool_stmts))
3582 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3583 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3584 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3585 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3586 else
3587 pattern_stmt
3588 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3590 else
3592 tree type = search_type_for_mask (var, vinfo);
3593 tree cst0, cst1, tmp;
3595 if (!type)
3596 return NULL;
3598 /* We may directly use cond with narrowed type to avoid
3599 multiple cond exprs with following result packing and
3600 perform single cond with packed mask instead. In case
3601 of widening we better make cond first and then extract
3602 results. */
3603 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3604 type = TREE_TYPE (lhs);
3606 cst0 = build_int_cst (type, 0);
3607 cst1 = build_int_cst (type, 1);
3608 tmp = vect_recog_temp_ssa_var (type, NULL);
3609 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3611 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3613 tree new_vectype = get_vectype_for_scalar_type (type);
3614 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3615 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3616 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3617 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3619 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3620 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3624 *type_out = vectype;
3625 stmts->safe_push (last_stmt);
3626 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3628 return pattern_stmt;
3630 else if (rhs_code == COND_EXPR
3631 && TREE_CODE (var) == SSA_NAME)
3633 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3634 if (vectype == NULL_TREE)
3635 return NULL;
3637 /* Build a scalar type for the boolean result that when
3638 vectorized matches the vector type of the result in
3639 size and number of elements. */
3640 unsigned prec
3641 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3642 TYPE_VECTOR_SUBPARTS (vectype));
3644 tree type
3645 = build_nonstandard_integer_type (prec,
3646 TYPE_UNSIGNED (TREE_TYPE (var)));
3647 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3648 return NULL;
3650 if (!check_bool_pattern (var, vinfo, bool_stmts))
3651 return NULL;
3653 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3655 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3656 pattern_stmt
3657 = gimple_build_assign (lhs, COND_EXPR,
3658 build2 (NE_EXPR, boolean_type_node,
3659 rhs, build_int_cst (type, 0)),
3660 gimple_assign_rhs2 (last_stmt),
3661 gimple_assign_rhs3 (last_stmt));
3662 *type_out = vectype;
3663 stmts->safe_push (last_stmt);
3664 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3666 return pattern_stmt;
3668 else if (rhs_code == SSA_NAME
3669 && STMT_VINFO_DATA_REF (stmt_vinfo))
3671 stmt_vec_info pattern_stmt_info;
3672 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3673 gcc_assert (vectype != NULL_TREE);
3674 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3675 return NULL;
3677 if (check_bool_pattern (var, vinfo, bool_stmts))
3678 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3679 else
3681 tree type = search_type_for_mask (var, vinfo);
3682 tree cst0, cst1, new_vectype;
3684 if (!type)
3685 return NULL;
3687 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3688 type = TREE_TYPE (vectype);
3690 cst0 = build_int_cst (type, 0);
3691 cst1 = build_int_cst (type, 1);
3692 new_vectype = get_vectype_for_scalar_type (type);
3694 rhs = vect_recog_temp_ssa_var (type, NULL);
3695 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3697 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3698 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3699 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3700 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3703 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3704 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3706 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3707 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3708 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3709 rhs = rhs2;
3711 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3712 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3713 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3714 STMT_VINFO_DATA_REF (pattern_stmt_info)
3715 = STMT_VINFO_DATA_REF (stmt_vinfo);
3716 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3717 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3718 *type_out = vectype;
3719 stmts->safe_push (last_stmt);
3720 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3722 return pattern_stmt;
3724 else
3725 return NULL;
3729 /* A helper for vect_recog_mask_conversion_pattern. Build
3730 conversion of MASK to a type suitable for masking VECTYPE.
3731 Built statement gets required vectype and is appended to
3732 a pattern sequence of STMT_VINFO.
3734 Return converted mask. */
3736 static tree
3737 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3738 vec_info *vinfo)
3740 gimple *stmt;
3741 tree masktype, tmp;
3742 stmt_vec_info new_stmt_info;
3744 masktype = build_same_sized_truth_vector_type (vectype);
3745 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3746 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3747 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3748 set_vinfo_for_stmt (stmt, new_stmt_info);
3749 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3750 append_pattern_def_seq (stmt_vinfo, stmt);
3752 return tmp;
3756 /* Function vect_recog_mask_conversion_pattern
3758 Try to find statements which require boolean type
3759 converison. Additional conversion statements are
3760 added to handle such cases. For example:
3762 bool m_1, m_2, m_3;
3763 int i_4, i_5;
3764 double d_6, d_7;
3765 char c_1, c_2, c_3;
3767 S1 m_1 = i_4 > i_5;
3768 S2 m_2 = d_6 < d_7;
3769 S3 m_3 = m_1 & m_2;
3770 S4 c_1 = m_3 ? c_2 : c_3;
3772 Will be transformed into:
3774 S1 m_1 = i_4 > i_5;
3775 S2 m_2 = d_6 < d_7;
3776 S3'' m_2' = (_Bool[bitsize=32])m_2
3777 S3' m_3' = m_1 & m_2';
3778 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3779 S4' c_1' = m_3'' ? c_2 : c_3; */
3781 static gimple *
3782 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_out)
3784 gimple *last_stmt = stmts->pop ();
3785 enum tree_code rhs_code;
3786 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3787 tree vectype1, vectype2;
3788 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3789 stmt_vec_info pattern_stmt_info;
3790 vec_info *vinfo = stmt_vinfo->vinfo;
3792 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3793 if (is_gimple_call (last_stmt)
3794 && gimple_call_internal_p (last_stmt)
3795 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3796 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3798 gcall *pattern_stmt;
3799 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3801 if (load)
3803 lhs = gimple_call_lhs (last_stmt);
3804 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3806 else
3808 rhs2 = gimple_call_arg (last_stmt, 3);
3809 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3812 rhs1 = gimple_call_arg (last_stmt, 2);
3813 rhs1_type = search_type_for_mask (rhs1, vinfo);
3814 if (!rhs1_type)
3815 return NULL;
3816 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3818 if (!vectype1 || !vectype2
3819 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3820 TYPE_VECTOR_SUBPARTS (vectype2)))
3821 return NULL;
3823 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3825 if (load)
3827 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3828 pattern_stmt
3829 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3830 gimple_call_arg (last_stmt, 0),
3831 gimple_call_arg (last_stmt, 1),
3832 tmp);
3833 gimple_call_set_lhs (pattern_stmt, lhs);
3835 else
3836 pattern_stmt
3837 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3838 gimple_call_arg (last_stmt, 0),
3839 gimple_call_arg (last_stmt, 1),
3840 tmp,
3841 gimple_call_arg (last_stmt, 3));
3843 gimple_call_set_nothrow (pattern_stmt, true);
3845 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3846 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3847 STMT_VINFO_DATA_REF (pattern_stmt_info)
3848 = STMT_VINFO_DATA_REF (stmt_vinfo);
3849 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3850 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3851 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
3852 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo);
3854 *type_out = vectype1;
3855 stmts->safe_push (last_stmt);
3856 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3858 return pattern_stmt;
3861 if (!is_gimple_assign (last_stmt))
3862 return NULL;
3864 gimple *pattern_stmt;
3865 lhs = gimple_assign_lhs (last_stmt);
3866 rhs1 = gimple_assign_rhs1 (last_stmt);
3867 rhs_code = gimple_assign_rhs_code (last_stmt);
3869 /* Check for cond expression requiring mask conversion. */
3870 if (rhs_code == COND_EXPR)
3872 /* vect_recog_mixed_size_cond_pattern could apply.
3873 Do nothing then. */
3874 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3875 return NULL;
3877 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3879 if (TREE_CODE (rhs1) == SSA_NAME)
3881 rhs1_type = search_type_for_mask (rhs1, vinfo);
3882 if (!rhs1_type)
3883 return NULL;
3885 else if (COMPARISON_CLASS_P (rhs1))
3887 /* Check whether we're comparing scalar booleans and (if so)
3888 whether a better mask type exists than the mask associated
3889 with boolean-sized elements. This avoids unnecessary packs
3890 and unpacks if the booleans are set from comparisons of
3891 wider types. E.g. in:
3893 int x1, x2, x3, x4, y1, y1;
3895 bool b1 = (x1 == x2);
3896 bool b2 = (x3 == x4);
3897 ... = b1 == b2 ? y1 : y2;
3899 it is better for b1 and b2 to use the mask type associated
3900 with int elements rather bool (byte) elements. */
3901 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
3902 if (!rhs1_type)
3903 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3905 else
3906 return NULL;
3908 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3910 if (!vectype1 || !vectype2)
3911 return NULL;
3913 /* Continue if a conversion is needed. Also continue if we have
3914 a comparison whose vector type would normally be different from
3915 VECTYPE2 when considered in isolation. In that case we'll
3916 replace the comparison with an SSA name (so that we can record
3917 its vector type) and behave as though the comparison was an SSA
3918 name from the outset. */
3919 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3920 TYPE_VECTOR_SUBPARTS (vectype2))
3921 && (TREE_CODE (rhs1) == SSA_NAME
3922 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
3923 return NULL;
3925 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
3926 in place, we can handle it in vectorizable_condition. This avoids
3927 unnecessary promotion stmts and increased vectorization factor. */
3928 if (COMPARISON_CLASS_P (rhs1)
3929 && INTEGRAL_TYPE_P (rhs1_type)
3930 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
3931 TYPE_VECTOR_SUBPARTS (vectype2)))
3933 gimple *dummy;
3934 enum vect_def_type dt;
3935 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
3936 &dummy, &dt)
3937 && dt == vect_external_def
3938 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
3939 &dummy, &dt)
3940 && (dt == vect_external_def
3941 || dt == vect_constant_def))
3943 tree wide_scalar_type = build_nonstandard_integer_type
3944 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
3945 TYPE_UNSIGNED (rhs1_type));
3946 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
3947 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
3948 return NULL;
3952 /* If rhs1 is a comparison we need to move it into a
3953 separate statement. */
3954 if (TREE_CODE (rhs1) != SSA_NAME)
3956 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3957 pattern_stmt = gimple_build_assign (tmp, rhs1);
3958 rhs1 = tmp;
3960 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3961 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3962 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
3963 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3966 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
3967 TYPE_VECTOR_SUBPARTS (vectype2)))
3968 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3969 else
3970 tmp = rhs1;
3972 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3973 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
3974 gimple_assign_rhs2 (last_stmt),
3975 gimple_assign_rhs3 (last_stmt));
3977 *type_out = vectype1;
3978 stmts->safe_push (last_stmt);
3979 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3981 return pattern_stmt;
3984 /* Now check for binary boolean operations requiring conversion for
3985 one of operands. */
3986 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
3987 return NULL;
3989 if (rhs_code != BIT_IOR_EXPR
3990 && rhs_code != BIT_XOR_EXPR
3991 && rhs_code != BIT_AND_EXPR
3992 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
3993 return NULL;
3995 rhs2 = gimple_assign_rhs2 (last_stmt);
3997 rhs1_type = search_type_for_mask (rhs1, vinfo);
3998 rhs2_type = search_type_for_mask (rhs2, vinfo);
4000 if (!rhs1_type || !rhs2_type
4001 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4002 return NULL;
4004 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4006 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4007 if (!vectype1)
4008 return NULL;
4009 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4011 else
4013 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4014 if (!vectype1)
4015 return NULL;
4016 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4019 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4020 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4022 *type_out = vectype1;
4023 stmts->safe_push (last_stmt);
4024 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4026 return pattern_stmt;
4029 /* STMT is a load or store. If the load or store is conditional, return
4030 the boolean condition under which it occurs, otherwise return null. */
4032 static tree
4033 vect_get_load_store_mask (gimple *stmt)
4035 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4037 gcc_assert (gimple_assign_single_p (def_assign));
4038 return NULL_TREE;
4041 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4043 internal_fn ifn = gimple_call_internal_fn (def_call);
4044 int mask_index = internal_fn_mask_index (ifn);
4045 return gimple_call_arg (def_call, mask_index);
4048 gcc_unreachable ();
4051 /* Return the scalar offset type that an internal gather/scatter function
4052 should use. GS_INFO describes the gather/scatter operation. */
4054 static tree
4055 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4057 tree offset_type = TREE_TYPE (gs_info->offset);
4058 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4060 /* Enforced by vect_check_gather_scatter. */
4061 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4062 gcc_assert (element_bits >= offset_bits);
4064 /* If the offset is narrower than the elements, extend it according
4065 to its sign. */
4066 if (element_bits > offset_bits)
4067 return build_nonstandard_integer_type (element_bits,
4068 TYPE_UNSIGNED (offset_type));
4070 return offset_type;
4073 /* Return MASK if MASK is suitable for masking an operation on vectors
4074 of type VECTYPE, otherwise convert it into such a form and return
4075 the result. Associate any conversion statements with STMT_INFO's
4076 pattern. */
4078 static tree
4079 vect_convert_mask_for_vectype (tree mask, tree vectype,
4080 stmt_vec_info stmt_info, vec_info *vinfo)
4082 tree mask_type = search_type_for_mask (mask, vinfo);
4083 if (mask_type)
4085 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4086 if (mask_vectype
4087 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4088 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4089 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4091 return mask;
4094 /* Return the equivalent of:
4096 fold_convert (TYPE, VALUE)
4098 with the expectation that the operation will be vectorized.
4099 If new statements are needed, add them as pattern statements
4100 to STMT_INFO. */
4102 static tree
4103 vect_add_conversion_to_patterm (tree type, tree value,
4104 stmt_vec_info stmt_info,
4105 vec_info *vinfo)
4107 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4108 return value;
4110 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4111 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4112 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4113 set_vinfo_for_stmt (conversion, new_stmt_info);
4114 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4115 append_pattern_def_seq (stmt_info, conversion);
4116 return new_value;
4119 /* Try to convert STMT into a call to a gather load or scatter store
4120 internal function. Return the final statement on success and set
4121 *TYPE_OUT to the vector type being loaded or stored.
4123 This function only handles gathers and scatters that were recognized
4124 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4126 static gimple *
4127 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4128 tree *type_out)
4130 /* Currently we only support this for loop vectorization. */
4131 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4132 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4133 if (!loop_vinfo)
4134 return NULL;
4136 /* Make sure that we're looking at a gather load or scatter store. */
4137 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4138 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4139 return NULL;
4141 /* Get the boolean that controls whether the load or store happens.
4142 This is null if the operation is unconditional. */
4143 tree mask = vect_get_load_store_mask (stmt);
4145 /* Make sure that the target supports an appropriate internal
4146 function for the gather/scatter operation. */
4147 gather_scatter_info gs_info;
4148 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4149 || gs_info.decl)
4150 return NULL;
4152 /* Convert the mask to the right form. */
4153 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4154 if (mask)
4155 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4156 loop_vinfo);
4158 /* Get the invariant base and non-invariant offset, converting the
4159 latter to the same width as the vector elements. */
4160 tree base = gs_info.base;
4161 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4162 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4163 last_stmt_info, loop_vinfo);
4165 /* Build the new pattern statement. */
4166 tree scale = size_int (gs_info.scale);
4167 gcall *pattern_stmt;
4168 if (DR_IS_READ (dr))
4170 if (mask != NULL)
4171 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4172 offset, scale, mask);
4173 else
4174 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4175 offset, scale);
4176 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4177 gimple_call_set_lhs (pattern_stmt, load_lhs);
4179 else
4181 tree rhs = vect_get_store_rhs (stmt);
4182 if (mask != NULL)
4183 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4184 base, offset, scale, rhs,
4185 mask);
4186 else
4187 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4188 base, offset, scale, rhs);
4190 gimple_call_set_nothrow (pattern_stmt, true);
4192 /* Copy across relevant vectorization info and associate DR with the
4193 new pattern statement instead of the original statement. */
4194 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4195 loop_vinfo);
4196 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4197 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4198 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4199 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4200 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4201 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4203 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4204 *type_out = vectype;
4205 vect_pattern_detected ("gather/scatter pattern", stmt);
4207 return pattern_stmt;
4210 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4212 static gimple *
4213 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_out)
4215 gimple *last_stmt = stmts->pop ();
4216 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4217 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4218 last_stmt_info,
4219 type_out);
4220 if (pattern_stmt)
4221 stmts->safe_push (last_stmt);
4222 return pattern_stmt;
4225 typedef gimple *(*vect_recog_func_ptr) (vec<gimple *> *, tree *);
4227 struct vect_recog_func
4229 vect_recog_func_ptr fn;
4230 const char *name;
4233 /* Note that ordering matters - the first pattern matching on a stmt is
4234 taken which means usually the more complex one needs to preceed the
4235 less comples onex (widen_sum only after dot_prod or sad for example). */
4236 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4237 { vect_recog_widen_mult_pattern, "widen_mult" },
4238 { vect_recog_dot_prod_pattern, "dot_prod" },
4239 { vect_recog_sad_pattern, "sad" },
4240 { vect_recog_widen_sum_pattern, "widen_sum" },
4241 { vect_recog_pow_pattern, "pow" },
4242 { vect_recog_widen_shift_pattern, "widen_shift" },
4243 { vect_recog_over_widening_pattern, "over_widening" },
4244 { vect_recog_rotate_pattern, "rotate" },
4245 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4246 { vect_recog_divmod_pattern, "divmod" },
4247 { vect_recog_mult_pattern, "mult" },
4248 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4249 { vect_recog_bool_pattern, "bool" },
4250 /* This must come before mask conversion, and includes the parts
4251 of mask conversion that are needed for gather and scatter
4252 internal functions. */
4253 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4254 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4257 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4259 /* Mark statements that are involved in a pattern. */
4261 static inline void
4262 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4263 tree pattern_vectype)
4265 stmt_vec_info pattern_stmt_info, def_stmt_info;
4266 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4267 vec_info *vinfo = orig_stmt_info->vinfo;
4268 gimple *def_stmt;
4270 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4271 if (pattern_stmt_info == NULL)
4273 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4274 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4276 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4278 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4279 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4280 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4281 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4282 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4283 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4284 if (gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info))
4285 for (gimple_stmt_iterator si = gsi_start (def_seq);
4286 !gsi_end_p (si); gsi_next (&si))
4288 def_stmt = gsi_stmt (si);
4289 def_stmt_info = vinfo_for_stmt (def_stmt);
4290 if (def_stmt_info == NULL)
4292 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4293 set_vinfo_for_stmt (def_stmt, def_stmt_info);
4295 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4296 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4297 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4298 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4299 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4303 /* Function vect_pattern_recog_1
4305 Input:
4306 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4307 computation pattern.
4308 STMT: A stmt from which the pattern search should start.
4310 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4311 a sequence of statements that has the same functionality and can be
4312 used to replace STMT. It returns the last statement in the sequence
4313 and adds any earlier statements to STMT's STMT_VINFO_PATTERN_DEF_SEQ.
4314 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4315 statement, having first checked that the target supports the new operation
4316 in that type.
4318 This function also does some bookkeeping, as explained in the documentation
4319 for vect_recog_pattern. */
4321 static bool
4322 vect_pattern_recog_1 (vect_recog_func *recog_func,
4323 gimple_stmt_iterator si,
4324 vec<gimple *> *stmts_to_replace)
4326 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4327 stmt_vec_info stmt_info;
4328 loop_vec_info loop_vinfo;
4329 tree pattern_vectype;
4330 int i;
4332 stmts_to_replace->truncate (0);
4333 stmts_to_replace->quick_push (stmt);
4334 pattern_stmt = recog_func->fn (stmts_to_replace, &pattern_vectype);
4335 if (!pattern_stmt)
4337 /* Clear related stmt info that analysis might have noted for
4338 to be replaced stmts. */
4339 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4340 && (unsigned) i < stmts_to_replace->length ();
4341 i++)
4343 stmt_info = vinfo_for_stmt (stmt);
4344 STMT_VINFO_RELATED_STMT (stmt_info) = NULL;
4346 return false;
4349 stmt = stmts_to_replace->last ();
4350 stmt_info = vinfo_for_stmt (stmt);
4351 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4352 gcc_assert (pattern_vectype);
4354 /* Found a vectorizable pattern. */
4355 if (dump_enabled_p ())
4357 dump_printf_loc (MSG_NOTE, vect_location,
4358 "%s pattern recognized: ", recog_func->name);
4359 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4362 /* Mark the stmts that are involved in the pattern. */
4363 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4365 /* Patterns cannot be vectorized using SLP, because they change the order of
4366 computation. */
4367 if (loop_vinfo)
4369 unsigned ix, ix2;
4370 gimple **elem_ptr;
4371 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4372 elem_ptr, *elem_ptr == stmt);
4375 /* It is possible that additional pattern stmts are created and inserted in
4376 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4377 relevant statements. */
4378 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4379 && (unsigned) i < (stmts_to_replace->length () - 1);
4380 i++)
4382 stmt_info = vinfo_for_stmt (stmt);
4383 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4384 if (dump_enabled_p ())
4386 dump_printf_loc (MSG_NOTE, vect_location,
4387 "additional pattern stmt: ");
4388 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4391 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4394 return true;
4398 /* Function vect_pattern_recog
4400 Input:
4401 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4402 computation idioms.
4404 Output - for each computation idiom that is detected we create a new stmt
4405 that provides the same functionality and that can be vectorized. We
4406 also record some information in the struct_stmt_info of the relevant
4407 stmts, as explained below:
4409 At the entry to this function we have the following stmts, with the
4410 following initial value in the STMT_VINFO fields:
4412 stmt in_pattern_p related_stmt vec_stmt
4413 S1: a_i = .... - - -
4414 S2: a_2 = ..use(a_i).. - - -
4415 S3: a_1 = ..use(a_2).. - - -
4416 S4: a_0 = ..use(a_1).. - - -
4417 S5: ... = ..use(a_0).. - - -
4419 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4420 represented by a single stmt. We then:
4421 - create a new stmt S6 equivalent to the pattern (the stmt is not
4422 inserted into the code)
4423 - fill in the STMT_VINFO fields as follows:
4425 in_pattern_p related_stmt vec_stmt
4426 S1: a_i = .... - - -
4427 S2: a_2 = ..use(a_i).. - - -
4428 S3: a_1 = ..use(a_2).. - - -
4429 S4: a_0 = ..use(a_1).. true S6 -
4430 '---> S6: a_new = .... - S4 -
4431 S5: ... = ..use(a_0).. - - -
4433 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4434 to each other through the RELATED_STMT field).
4436 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4437 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4438 remain irrelevant unless used by stmts other than S4.
4440 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4441 (because they are marked as irrelevant). It will vectorize S6, and record
4442 a pointer to the new vector stmt VS6 from S6 (as usual).
4443 S4 will be skipped, and S5 will be vectorized as usual:
4445 in_pattern_p related_stmt vec_stmt
4446 S1: a_i = .... - - -
4447 S2: a_2 = ..use(a_i).. - - -
4448 S3: a_1 = ..use(a_2).. - - -
4449 > VS6: va_new = .... - - -
4450 S4: a_0 = ..use(a_1).. true S6 VS6
4451 '---> S6: a_new = .... - S4 VS6
4452 > VS5: ... = ..vuse(va_new).. - - -
4453 S5: ... = ..use(a_0).. - - -
4455 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4456 elsewhere), and we'll end up with:
4458 VS6: va_new = ....
4459 VS5: ... = ..vuse(va_new)..
4461 In case of more than one pattern statements, e.g., widen-mult with
4462 intermediate type:
4464 S1 a_t = ;
4465 S2 a_T = (TYPE) a_t;
4466 '--> S3: a_it = (interm_type) a_t;
4467 S4 prod_T = a_T * CONST;
4468 '--> S5: prod_T' = a_it w* CONST;
4470 there may be other users of a_T outside the pattern. In that case S2 will
4471 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4472 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4473 be recorded in S3. */
4475 void
4476 vect_pattern_recog (vec_info *vinfo)
4478 struct loop *loop;
4479 basic_block *bbs;
4480 unsigned int nbbs;
4481 gimple_stmt_iterator si;
4482 unsigned int i, j;
4483 auto_vec<gimple *, 1> stmts_to_replace;
4485 DUMP_VECT_SCOPE ("vect_pattern_recog");
4487 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4489 loop = LOOP_VINFO_LOOP (loop_vinfo);
4490 bbs = LOOP_VINFO_BBS (loop_vinfo);
4491 nbbs = loop->num_nodes;
4493 /* Scan through the loop stmts, applying the pattern recognition
4494 functions starting at each stmt visited: */
4495 for (i = 0; i < nbbs; i++)
4497 basic_block bb = bbs[i];
4498 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4500 gimple *stmt = gsi_stmt (si);
4501 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4502 if (stmt_info && STMT_VINFO_IN_PATTERN_P (stmt_info))
4503 continue;
4504 /* Scan over all generic vect_recog_xxx_pattern functions. */
4505 for (j = 0; j < NUM_PATTERNS; j++)
4506 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4507 &stmts_to_replace))
4508 break;
4512 else
4514 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4515 for (si = bb_vinfo->region_begin;
4516 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4518 gimple *stmt = gsi_stmt (si);
4519 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4520 if (stmt_info
4521 && (!STMT_VINFO_VECTORIZABLE (stmt_info)
4522 || STMT_VINFO_IN_PATTERN_P (stmt_info)))
4523 continue;
4525 /* Scan over all generic vect_recog_xxx_pattern functions. */
4526 for (j = 0; j < NUM_PATTERNS; j++)
4527 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4528 &stmts_to_replace))
4529 break;