[Ada] Empty CUDA_Global procedures when compiling for host
[official-gcc.git] / gcc / tree-vect-slp-patterns.c
blobb8d09b7832e29689ede832d555e1b6af2c24ce1e
1 /* SLP - Pattern matcher on SLP trees
2 Copyright (C) 2020-2021 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "gimple-iterator.h"
36 #include "cfgloop.h"
37 #include "tree-vectorizer.h"
38 #include "langhooks.h"
39 #include "gimple-walk.h"
40 #include "dbgcnt.h"
41 #include "tree-vector-builder.h"
42 #include "vec-perm-indices.h"
43 #include "gimple-fold.h"
44 #include "internal-fn.h"
46 /* SLP Pattern matching mechanism.
48 This extension to the SLP vectorizer allows one to transform the generated SLP
49 tree based on any pattern. The difference between this and the normal vect
50 pattern matcher is that unlike the former, this matcher allows you to match
51 with instructions that do not belong to the same SSA dominator graph.
53 The only requirement that this pattern matcher has is that you are only
54 only allowed to either match an entire group or none.
56 The pattern matcher currently only allows you to perform replacements to
57 internal functions.
59 Once the patterns are matched it is one way, these cannot be undone. It is
60 currently not supported to match patterns recursively.
62 To add a new pattern, implement the vect_pattern class and add the type to
63 slp_patterns.
67 /*******************************************************************************
68 * vect_pattern class
69 ******************************************************************************/
71 /* Default implementation of recognize that performs matching, validation and
72 replacement of nodes but that can be overriden if required. */
74 static bool
75 vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
77 tree vectype = SLP_TREE_VECTYPE (node);
78 if (ifn == IFN_LAST || !vectype)
79 return false;
81 if (dump_enabled_p ())
82 dump_printf_loc (MSG_NOTE, vect_location,
83 "Found %s pattern in SLP tree\n",
84 internal_fn_name (ifn));
86 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
88 if (dump_enabled_p ())
89 dump_printf_loc (MSG_NOTE, vect_location,
90 "Target supports %s vectorization with mode %T\n",
91 internal_fn_name (ifn), vectype);
93 else
95 if (dump_enabled_p ())
97 if (!vectype)
98 dump_printf_loc (MSG_NOTE, vect_location,
99 "Target does not support vector type for %T\n",
100 SLP_TREE_DEF_TYPE (node));
101 else
102 dump_printf_loc (MSG_NOTE, vect_location,
103 "Target does not support %s for vector type "
104 "%T\n", internal_fn_name (ifn), vectype);
106 return false;
108 return true;
111 /*******************************************************************************
112 * General helper types
113 ******************************************************************************/
115 /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
116 be matched when looking for expressions that we are interested matching for
117 complex numbers addition and mla. */
119 typedef enum _complex_operation : unsigned {
120 PLUS_PLUS,
121 MINUS_PLUS,
122 PLUS_MINUS,
123 MULT_MULT,
124 CMPLX_NONE
125 } complex_operation_t;
127 /*******************************************************************************
128 * General helper functions
129 ******************************************************************************/
131 /* Helper function of linear_loads_p that checks to see if the load permutation
132 is sequential and in monotonically increasing order of loads with no gaps.
135 static inline complex_perm_kinds_t
136 is_linear_load_p (load_permutation_t loads)
138 if (loads.length() == 0)
139 return PERM_UNKNOWN;
141 unsigned load, i;
142 complex_perm_kinds_t candidates[4]
143 = { PERM_ODDODD
144 , PERM_EVENEVEN
145 , PERM_EVENODD
146 , PERM_ODDEVEN
149 int valid_patterns = 4;
150 FOR_EACH_VEC_ELT (loads, i, load)
152 if (candidates[0] != PERM_UNKNOWN && load != 1)
154 candidates[0] = PERM_UNKNOWN;
155 valid_patterns--;
157 if (candidates[1] != PERM_UNKNOWN && load != 0)
159 candidates[1] = PERM_UNKNOWN;
160 valid_patterns--;
162 if (candidates[2] != PERM_UNKNOWN && load != i)
164 candidates[2] = PERM_UNKNOWN;
165 valid_patterns--;
167 if (candidates[3] != PERM_UNKNOWN
168 && load != (i % 2 == 0 ? i + 1 : i - 1))
170 candidates[3] = PERM_UNKNOWN;
171 valid_patterns--;
174 if (valid_patterns == 0)
175 return PERM_UNKNOWN;
178 for (i = 0; i < sizeof(candidates); i++)
179 if (candidates[i] != PERM_UNKNOWN)
180 return candidates[i];
182 return PERM_UNKNOWN;
185 /* Combine complex_perm_kinds A and B into a new permute kind that describes the
186 resulting operation. */
188 static inline complex_perm_kinds_t
189 vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
191 if (a == b)
192 return a;
194 if (a == PERM_TOP)
195 return b;
197 if (b == PERM_TOP)
198 return a;
200 return PERM_UNKNOWN;
203 /* Check to see if all loads rooted in ROOT are linear. Linearity is
204 defined as having no gaps between values loaded. */
206 static complex_perm_kinds_t
207 linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
209 if (!root)
210 return PERM_UNKNOWN;
212 unsigned i;
213 complex_perm_kinds_t *tmp;
215 if ((tmp = perm_cache->get (root)) != NULL)
216 return *tmp;
218 complex_perm_kinds_t retval = PERM_UNKNOWN;
219 perm_cache->put (root, retval);
221 /* If it's a load node, then just read the load permute. */
222 if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
224 retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
225 perm_cache->put (root, retval);
226 return retval;
228 else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
230 retval = PERM_TOP;
231 perm_cache->put (root, retval);
232 return retval;
235 complex_perm_kinds_t kind = PERM_TOP;
237 slp_tree child;
238 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
240 complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
241 kind = vect_merge_perms (kind, res);
242 /* Unknown and Top are not valid on blends as they produce no permute. */
243 retval = kind;
244 if (kind == PERM_UNKNOWN || kind == PERM_TOP)
245 return retval;
248 retval = kind;
250 perm_cache->put (root, retval);
251 return retval;
255 /* This function attempts to make a node rooted in NODE is linear. If the node
256 if already linear than the node itself is returned in RESULT.
258 If the node is not linear then a new VEC_PERM_EXPR node is created with a
259 lane permute that when applied will make the node linear. If such a
260 permute cannot be created then FALSE is returned from the function.
262 Here linearity is defined as having a sequential, monotically increasing
263 load position inside the load permute generated by the loads reachable from
264 NODE. */
266 static slp_tree
267 vect_build_swap_evenodd_node (slp_tree node)
269 /* Attempt to linearise the permute. */
270 vec<std::pair<unsigned, unsigned> > zipped;
271 zipped.create (SLP_TREE_LANES (node));
273 for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
275 zipped.quick_push (std::make_pair (0, x+1));
276 zipped.quick_push (std::make_pair (0, x));
279 /* Create the new permute node and store it instead. */
280 slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
281 SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
282 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
283 SLP_TREE_CHILDREN (vnode).quick_push (node);
284 SLP_TREE_REF_COUNT (vnode) = 1;
285 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
286 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
287 SLP_TREE_REF_COUNT (node)++;
288 return vnode;
291 /* Checks to see of the expression represented by NODE is a gimple assign with
292 code CODE. */
294 static inline bool
295 vect_match_expression_p (slp_tree node, tree_code code)
297 if (!node
298 || !SLP_TREE_REPRESENTATIVE (node))
299 return false;
301 gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
302 if (!is_gimple_assign (expr)
303 || gimple_assign_rhs_code (expr) != code)
304 return false;
306 return true;
309 /* Checks to see if the expression represented by NODE is a call to the internal
310 function FN. */
312 static inline bool
313 vect_match_call_p (slp_tree node, internal_fn fn)
315 if (!node
316 || !SLP_TREE_REPRESENTATIVE (node))
317 return false;
319 gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
320 if (!expr
321 || !gimple_call_internal_p (expr, fn))
322 return false;
324 return true;
327 /* Check if the given lane permute in PERMUTES matches an alternating sequence
328 of {even odd even odd ...}. This to account for unrolled loops. Further
329 mode there resulting permute must be linear. */
331 static inline bool
332 vect_check_evenodd_blend (lane_permutation_t &permutes,
333 unsigned even, unsigned odd)
335 if (permutes.length () == 0
336 || permutes.length () % 2 != 0)
337 return false;
339 unsigned val[2] = {even, odd};
340 unsigned seed = 0;
341 for (unsigned i = 0; i < permutes.length (); i++)
342 if (permutes[i].first != val[i % 2]
343 || permutes[i].second != seed++)
344 return false;
346 return true;
349 /* This function will match the two gimple expressions representing NODE1 and
350 NODE2 in parallel and returns the pair operation that represents the two
351 expressions in the two statements.
353 If match is successful then the corresponding complex_operation is
354 returned and the arguments to the two matched operations are returned in OPS.
356 If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
357 from the two nodes alternatingly.
359 If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
361 e.g. the following gimple statements
363 stmt 0 _39 = _37 + _12;
364 stmt 1 _6 = _38 - _36;
366 will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
369 static complex_operation_t
370 vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
371 bool two_operands = true, vec<slp_tree> *ops = NULL)
373 complex_operation_t result = CMPLX_NONE;
375 if (vect_match_expression_p (node1, MINUS_EXPR)
376 && vect_match_expression_p (node2, PLUS_EXPR)
377 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
378 result = MINUS_PLUS;
379 else if (vect_match_expression_p (node1, PLUS_EXPR)
380 && vect_match_expression_p (node2, MINUS_EXPR)
381 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
382 result = PLUS_MINUS;
383 else if (vect_match_expression_p (node1, PLUS_EXPR)
384 && vect_match_expression_p (node2, PLUS_EXPR))
385 result = PLUS_PLUS;
386 else if (vect_match_expression_p (node1, MULT_EXPR)
387 && vect_match_expression_p (node2, MULT_EXPR))
388 result = MULT_MULT;
390 if (result != CMPLX_NONE && ops != NULL)
392 ops->safe_push (node1);
393 ops->safe_push (node2);
395 return result;
398 /* Overload of vect_detect_pair_op that matches against the representative
399 statements in the children of NODE. It is expected that NODE has exactly
400 two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */
402 static complex_operation_t
403 vect_detect_pair_op (slp_tree node, bool two_operands = true,
404 vec<slp_tree> *ops = NULL)
406 if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR)
407 return CMPLX_NONE;
409 if (SLP_TREE_CHILDREN (node).length () != 2)
410 return CMPLX_NONE;
412 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
413 lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
415 return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
416 ops);
419 /*******************************************************************************
420 * complex_pattern class
421 ******************************************************************************/
423 /* SLP Complex Numbers pattern matching.
425 As an example, the following simple loop:
427 double a[restrict N]; double b[restrict N]; double c[restrict N];
429 for (int i=0; i < N; i+=2)
431 c[i] = a[i] - b[i+1];
432 c[i+1] = a[i+1] + b[i];
435 which represents a complex addition on with a rotation of 90* around the
436 argand plane. i.e. if `a` and `b` were complex numbers then this would be the
437 same as `a + (b * I)`.
439 Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
440 both recognized in order for the pattern to work. As an SLP tree this is
441 represented as
443 +--------------------------------+
444 | stmt 0 *_9 = _10; |
445 | stmt 1 *_15 = _16; |
446 +--------------------------------+
450 +--------------------------------+
451 | stmt 0 _10 = _4 - _8; |
452 | stmt 1 _16 = _12 + _14; |
453 | lane permutation { 0[0] 1[1] } |
454 +--------------------------------+
458 +-----+ | | +-----+
459 | | | | | |
460 +-----| { } |<-----+ +----->| { } --------+
461 | | | +------------------| | |
462 | +-----+ | +-----+ |
463 | | | |
464 | | | |
465 | +------|------------------+ |
466 | | | |
467 v v v v
468 +--------------------------+ +--------------------------------+
469 | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
470 | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
471 | load permutation { 1 0 } | | load permutation { 0 1 } |
472 +--------------------------+ +--------------------------------+
474 The pattern matcher allows you to replace both statements 0 and 1 or none at
475 all. Because this operation is a two operands operation the actual nodes
476 being replaced are those in the { } nodes. The actual scalar statements
477 themselves are not replaced or used during the matching but instead the
478 SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to
479 replace and match on any number of nodes.
481 Because the pattern matcher matches on the representative statement for the
482 SLP node the case of two_operators it allows you to match the children of the
483 node. This is done using the method `recognize ()`.
487 /* The complex_pattern class contains common code for pattern matchers that work
488 on complex numbers. These provide functionality to allow de-construction and
489 validation of sequences depicting/transforming REAL and IMAG pairs. */
491 class complex_pattern : public vect_pattern
493 protected:
494 auto_vec<slp_tree> m_workset;
495 complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
496 : vect_pattern (node, m_ops, ifn)
498 this->m_workset.safe_push (*node);
501 public:
502 void build (vec_info *);
504 static internal_fn
505 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
506 vec<slp_tree> *);
509 /* Create a replacement pattern statement for each node in m_node and inserts
510 the new statement into m_node as the new representative statement. The old
511 statement is marked as being in a pattern defined by the new statement. The
512 statement is created as call to internal function IFN with m_num_args
513 arguments.
515 Futhermore the new pattern is also added to the vectorization information
516 structure VINFO and the old statement STMT_INFO is marked as unused while
517 the new statement is marked as used and the number of SLP uses of the new
518 statement is incremented.
520 The newly created SLP nodes are marked as SLP only and will be dissolved
521 if SLP is aborted.
523 The newly created gimple call is returned and the BB remains unchanged.
525 This default method is designed to only match against simple operands where
526 all the input and output types are the same.
529 void
530 complex_pattern::build (vec_info *vinfo)
532 stmt_vec_info stmt_info;
534 auto_vec<tree> args;
535 args.create (this->m_num_args);
536 args.quick_grow_cleared (this->m_num_args);
537 slp_tree node;
538 unsigned ix;
539 stmt_vec_info call_stmt_info;
540 gcall *call_stmt = NULL;
542 /* Now modify the nodes themselves. */
543 FOR_EACH_VEC_ELT (this->m_workset, ix, node)
545 /* Calculate the location of the statement in NODE to replace. */
546 stmt_info = SLP_TREE_REPRESENTATIVE (node);
547 stmt_vec_info reduc_def
548 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
549 gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
550 tree lhs_old_stmt = gimple_get_lhs (old_stmt);
551 tree type = TREE_TYPE (lhs_old_stmt);
553 /* Create the argument set for use by gimple_build_call_internal_vec. */
554 for (unsigned i = 0; i < this->m_num_args; i++)
555 args[i] = lhs_old_stmt;
557 /* Create the new pattern statements. */
558 call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
559 tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
560 gimple_call_set_lhs (call_stmt, var);
561 gimple_set_location (call_stmt, gimple_location (old_stmt));
562 gimple_call_set_nothrow (call_stmt, true);
564 /* Adjust the book-keeping for the new and old statements for use during
565 SLP. This is required to get the right VF and statement during SLP
566 analysis. These changes are created after relevancy has been set for
567 the nodes as such we need to manually update them. Any changes will be
568 undone if SLP is cancelled. */
569 call_stmt_info
570 = vinfo->add_pattern_stmt (call_stmt, stmt_info);
572 /* Make sure to mark the representative statement pure_slp and
573 relevant and transfer reduction info. */
574 STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
575 STMT_SLP_TYPE (call_stmt_info) = pure_slp;
576 STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
578 gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
579 STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
580 STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
582 /* Since we are replacing all the statements in the group with the same
583 thing it doesn't really matter. So just set it every time a new stmt
584 is created. */
585 SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
586 SLP_TREE_LANE_PERMUTATION (node).release ();
587 SLP_TREE_CODE (node) = CALL_EXPR;
591 /*******************************************************************************
592 * complex_add_pattern class
593 ******************************************************************************/
595 class complex_add_pattern : public complex_pattern
597 protected:
598 complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
599 : complex_pattern (node, m_ops, ifn)
601 this->m_num_args = 2;
604 public:
605 void build (vec_info *);
606 static internal_fn
607 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
608 vec<slp_tree> *);
610 static vect_pattern*
611 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
613 static vect_pattern*
614 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
616 return new complex_add_pattern (node, m_ops, ifn);
620 /* Perform a replacement of the detected complex add pattern with the new
621 instruction sequences. */
623 void
624 complex_add_pattern::build (vec_info *vinfo)
626 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
628 slp_tree node = this->m_ops[0];
629 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
631 /* First re-arrange the children. */
632 SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
633 SLP_TREE_CHILDREN (*this->m_node)[1] =
634 vect_build_swap_evenodd_node (children[1]);
636 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
637 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
638 vect_free_slp_tree (this->m_ops[0]);
639 vect_free_slp_tree (this->m_ops[1]);
641 complex_pattern::build (vinfo);
644 /* Pattern matcher for trying to match complex addition pattern in SLP tree.
646 If no match is found then IFN is set to IFN_LAST.
647 This function matches the patterns shaped as:
649 c[i] = a[i] - b[i+1];
650 c[i+1] = a[i+1] + b[i];
652 If a match occurred then TRUE is returned, else FALSE. The initial match is
653 expected to be in OP1 and the initial match operands in args0. */
655 internal_fn
656 complex_add_pattern::matches (complex_operation_t op,
657 slp_tree_to_load_perm_map_t *perm_cache,
658 slp_tree *node, vec<slp_tree> *ops)
660 internal_fn ifn = IFN_LAST;
662 /* Find the two components. Rotation in the complex plane will modify
663 the operations:
665 * Rotation 0: + +
666 * Rotation 90: - +
667 * Rotation 180: - -
668 * Rotation 270: + -
670 Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
671 to care about them here. */
672 if (op == MINUS_PLUS)
673 ifn = IFN_COMPLEX_ADD_ROT90;
674 else if (op == PLUS_MINUS)
675 ifn = IFN_COMPLEX_ADD_ROT270;
676 else
677 return ifn;
679 /* verify that there is a permute, otherwise this isn't a pattern we
680 we support. */
681 gcc_assert (ops->length () == 2);
683 vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
685 /* First node must be unpermuted. */
686 if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
687 return IFN_LAST;
689 /* Second node must be permuted. */
690 if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
691 return IFN_LAST;
693 if (!vect_pattern_validate_optab (ifn, *node))
694 return IFN_LAST;
696 return ifn;
699 /* Attempt to recognize a complex add pattern. */
701 vect_pattern*
702 complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
703 slp_tree *node)
705 auto_vec<slp_tree> ops;
706 complex_operation_t op
707 = vect_detect_pair_op (*node, true, &ops);
708 internal_fn ifn
709 = complex_add_pattern::matches (op, perm_cache, node, &ops);
710 if (ifn == IFN_LAST)
711 return NULL;
713 return new complex_add_pattern (node, &ops, ifn);
716 /*******************************************************************************
717 * complex_mul_pattern
718 ******************************************************************************/
720 /* Helper function of that looks for a match in the CHILDth child of NODE. The
721 child used is stored in RES.
723 If the match is successful then ARGS will contain the operands matched
724 and the complex_operation_t type is returned. If match is not successful
725 then CMPLX_NONE is returned and ARGS is left unmodified. */
727 static inline complex_operation_t
728 vect_match_call_complex_mla (slp_tree node, unsigned child,
729 vec<slp_tree> *args = NULL, slp_tree *res = NULL)
731 gcc_assert (child < SLP_TREE_CHILDREN (node).length ());
733 slp_tree data = SLP_TREE_CHILDREN (node)[child];
735 if (res)
736 *res = data;
738 return vect_detect_pair_op (data, false, args);
741 /* Check to see if either of the trees in ARGS are a NEGATE_EXPR. If the first
742 child (args[0]) is a NEGATE_EXPR then NEG_FIRST_P is set to TRUE.
744 If a negate is found then the values in ARGS are reordered such that the
745 negate node is always the second one and the entry is replaced by the child
746 of the negate node. */
748 static inline bool
749 vect_normalize_conj_loc (vec<slp_tree> &args, bool *neg_first_p = NULL)
751 gcc_assert (args.length () == 2);
752 bool neg_found = false;
754 if (vect_match_expression_p (args[0], NEGATE_EXPR))
756 std::swap (args[0], args[1]);
757 neg_found = true;
758 if (neg_first_p)
759 *neg_first_p = true;
761 else if (vect_match_expression_p (args[1], NEGATE_EXPR))
763 neg_found = true;
764 if (neg_first_p)
765 *neg_first_p = false;
768 if (neg_found)
769 args[1] = SLP_TREE_CHILDREN (args[1])[0];
771 return neg_found;
774 /* Helper function to check if PERM is KIND or PERM_TOP. */
776 static inline bool
777 is_eq_or_top (complex_perm_kinds_t perm, complex_perm_kinds_t kind)
779 return perm == kind || perm == PERM_TOP;
782 /* Helper function that checks to see if LEFT_OP and RIGHT_OP are both MULT_EXPR
783 nodes but also that they represent an operation that is either a complex
784 multiplication or a complex multiplication by conjugated value.
786 Of the negation is expected to be in the first half of the tree (As required
787 by an FMS pattern) then NEG_FIRST is true. If the operation is a conjugate
788 operation then CONJ_FIRST_OPERAND is set to indicate whether the first or
789 second operand contains the conjugate operation. */
791 static inline bool
792 vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
793 const vec<slp_tree> &left_op,
794 const vec<slp_tree> &right_op,
795 bool neg_first, bool *conj_first_operand,
796 bool fms)
798 /* The presence of a negation indicates that we have either a conjugate or a
799 rotation. We need to distinguish which one. */
800 *conj_first_operand = false;
801 complex_perm_kinds_t kind;
803 /* Complex conjugates have the negation on the imaginary part of the
804 number where rotations affect the real component. So check if the
805 negation is on a dup of lane 1. */
806 if (fms)
808 /* Canonicalization for fms is not consistent. So have to test both
809 variants to be sure. This needs to be fixed in the mid-end so
810 this part can be simpler. */
811 kind = linear_loads_p (perm_cache, right_op[0]);
812 if (!((is_eq_or_top (linear_loads_p (perm_cache, right_op[0]), PERM_ODDODD)
813 && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
814 PERM_ODDEVEN))
815 || (kind == PERM_ODDEVEN
816 && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
817 PERM_ODDODD))))
818 return false;
820 else
822 if (linear_loads_p (perm_cache, right_op[1]) != PERM_ODDODD
823 && !is_eq_or_top (linear_loads_p (perm_cache, right_op[0]),
824 PERM_ODDEVEN))
825 return false;
828 /* Deal with differences in indexes. */
829 int index1 = fms ? 1 : 0;
830 int index2 = fms ? 0 : 1;
832 /* Check if the conjugate is on the second first or second operand. The
833 order of the node with the conjugate value determines this, and the dup
834 node must be one of lane 0 of the same DR as the neg node. */
835 kind = linear_loads_p (perm_cache, left_op[index1]);
836 if (kind == PERM_TOP)
838 if (linear_loads_p (perm_cache, left_op[index2]) == PERM_EVENODD)
839 return true;
841 else if (kind == PERM_EVENODD)
843 if ((kind = linear_loads_p (perm_cache, left_op[index2])) == PERM_EVENODD)
844 return false;
845 return true;
847 else if (!neg_first)
848 *conj_first_operand = true;
849 else
850 return false;
852 if (kind != PERM_EVENEVEN)
853 return false;
855 return true;
858 /* Helper function to help distinguish between a conjugate and a rotation in a
859 complex multiplication. The operations have similar shapes but the order of
860 the load permutes are different. This function returns TRUE when the order
861 is consistent with a multiplication or multiplication by conjugated
862 operand but returns FALSE if it's a multiplication by rotated operand. */
864 static inline bool
865 vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
866 const vec<slp_tree> &op,
867 complex_perm_kinds_t permKind)
869 /* The left node is the more common case, test it first. */
870 if (!is_eq_or_top (linear_loads_p (perm_cache, op[0]), permKind))
872 if (!is_eq_or_top (linear_loads_p (perm_cache, op[1]), permKind))
873 return false;
875 return true;
878 /* This function combines two nodes containing only even and only odd lanes
879 together into a single node which contains the nodes in even/odd order
880 by using a lane permute.
882 The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
883 So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
885 The tree REPRESENTATION is taken from the supplied REP along with the
886 vectype which must be the same between all three nodes.
889 static slp_tree
890 vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
892 vec<std::pair<unsigned, unsigned> > perm;
893 perm.create (SLP_TREE_LANES (rep));
895 for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
897 perm.quick_push (std::make_pair (0, x));
898 perm.quick_push (std::make_pair (1, x+1));
901 slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
902 SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
903 SLP_TREE_LANE_PERMUTATION (vnode) = perm;
905 SLP_TREE_CHILDREN (vnode).create (2);
906 SLP_TREE_CHILDREN (vnode).quick_push (even);
907 SLP_TREE_CHILDREN (vnode).quick_push (odd);
908 SLP_TREE_REF_COUNT (even)++;
909 SLP_TREE_REF_COUNT (odd)++;
910 SLP_TREE_REF_COUNT (vnode) = 1;
912 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
913 gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
914 /* Representation is set to that of the current node as the vectorizer
915 can't deal with VEC_PERMs with no representation, as would be the
916 case with invariants. */
917 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
918 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
919 return vnode;
922 class complex_mul_pattern : public complex_pattern
924 protected:
925 complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
926 : complex_pattern (node, m_ops, ifn)
928 this->m_num_args = 2;
931 public:
932 void build (vec_info *);
933 static internal_fn
934 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
935 vec<slp_tree> *);
937 static vect_pattern*
938 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
940 static vect_pattern*
941 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
943 return new complex_mul_pattern (node, m_ops, ifn);
948 /* Pattern matcher for trying to match complex multiply pattern in SLP tree
949 If the operation matches then IFN is set to the operation it matched
950 and the arguments to the two replacement statements are put in m_ops.
952 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
954 This function matches the patterns shaped as:
956 double ax = (b[i+1] * a[i]);
957 double bx = (a[i+1] * b[i]);
959 c[i] = c[i] - ax;
960 c[i+1] = c[i+1] + bx;
962 If a match occurred then TRUE is returned, else FALSE. The initial match is
963 expected to be in OP1 and the initial match operands in args0. */
965 internal_fn
966 complex_mul_pattern::matches (complex_operation_t op,
967 slp_tree_to_load_perm_map_t *perm_cache,
968 slp_tree *node, vec<slp_tree> *ops)
970 internal_fn ifn = IFN_LAST;
972 if (op != MINUS_PLUS)
973 return IFN_LAST;
975 slp_tree root = *node;
976 /* First two nodes must be a multiply. */
977 auto_vec<slp_tree> muls;
978 if (vect_match_call_complex_mla (root, 0) != MULT_MULT
979 || vect_match_call_complex_mla (root, 1, &muls) != MULT_MULT)
980 return IFN_LAST;
982 /* Now operand2+4 may lead to another expression. */
983 auto_vec<slp_tree> left_op, right_op;
984 left_op.safe_splice (SLP_TREE_CHILDREN (muls[0]));
985 right_op.safe_splice (SLP_TREE_CHILDREN (muls[1]));
987 if (linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
988 return IFN_LAST;
990 bool neg_first = false;
991 bool conj_first_operand = false;
992 bool is_neg = vect_normalize_conj_loc (right_op, &neg_first);
994 if (!is_neg)
996 /* A multiplication needs to multiply agains the real pair, otherwise
997 the pattern matches that of FMS. */
998 if (!vect_validate_multiplication (perm_cache, left_op, PERM_EVENEVEN)
999 || vect_normalize_conj_loc (left_op))
1000 return IFN_LAST;
1001 ifn = IFN_COMPLEX_MUL;
1003 else if (is_neg)
1005 if (!vect_validate_multiplication (perm_cache, left_op, right_op,
1006 neg_first, &conj_first_operand,
1007 false))
1008 return IFN_LAST;
1010 ifn = IFN_COMPLEX_MUL_CONJ;
1013 if (!vect_pattern_validate_optab (ifn, *node))
1014 return IFN_LAST;
1016 ops->truncate (0);
1017 ops->create (3);
1019 complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]);
1020 if (kind == PERM_EVENODD)
1022 ops->quick_push (left_op[1]);
1023 ops->quick_push (right_op[1]);
1024 ops->quick_push (left_op[0]);
1026 else if (kind == PERM_TOP)
1028 ops->quick_push (left_op[1]);
1029 ops->quick_push (right_op[1]);
1030 ops->quick_push (left_op[0]);
1032 else if (kind == PERM_EVENEVEN && !conj_first_operand)
1034 ops->quick_push (left_op[0]);
1035 ops->quick_push (right_op[0]);
1036 ops->quick_push (left_op[1]);
1038 else
1040 ops->quick_push (left_op[0]);
1041 ops->quick_push (right_op[1]);
1042 ops->quick_push (left_op[1]);
1045 return ifn;
1048 /* Attempt to recognize a complex mul pattern. */
1050 vect_pattern*
1051 complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1052 slp_tree *node)
1054 auto_vec<slp_tree> ops;
1055 complex_operation_t op
1056 = vect_detect_pair_op (*node, true, &ops);
1057 internal_fn ifn
1058 = complex_mul_pattern::matches (op, perm_cache, node, &ops);
1059 if (ifn == IFN_LAST)
1060 return NULL;
1062 return new complex_mul_pattern (node, &ops, ifn);
1065 /* Perform a replacement of the detected complex mul pattern with the new
1066 instruction sequences. */
1068 void
1069 complex_mul_pattern::build (vec_info *vinfo)
1071 slp_tree node;
1072 unsigned i;
1073 slp_tree newnode
1074 = vect_build_combine_node (this->m_ops[0], this->m_ops[1], *this->m_node);
1075 SLP_TREE_REF_COUNT (this->m_ops[2])++;
1077 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1078 vect_free_slp_tree (node);
1080 /* First re-arrange the children. */
1081 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
1082 SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
1083 SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1085 /* And then rewrite the node itself. */
1086 complex_pattern::build (vinfo);
1089 /*******************************************************************************
1090 * complex_fma_pattern class
1091 ******************************************************************************/
1093 class complex_fma_pattern : public complex_pattern
1095 protected:
1096 complex_fma_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1097 : complex_pattern (node, m_ops, ifn)
1099 this->m_num_args = 3;
1102 public:
1103 void build (vec_info *);
1104 static internal_fn
1105 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1106 vec<slp_tree> *);
1108 static vect_pattern*
1109 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1111 static vect_pattern*
1112 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1114 return new complex_fma_pattern (node, m_ops, ifn);
1118 /* Pattern matcher for trying to match complex multiply and accumulate
1119 and multiply and subtract patterns in SLP tree.
1120 If the operation matches then IFN is set to the operation it matched and
1121 the arguments to the two replacement statements are put in m_ops.
1123 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1125 This function matches the patterns shaped as:
1127 double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1128 double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1130 c[i] = c[i] - ax;
1131 c[i+1] = c[i+1] + bx;
1133 If a match occurred then TRUE is returned, else FALSE. The match is
1134 performed after COMPLEX_MUL which would have done the majority of the work.
1135 This function merely matches an ADD with a COMPLEX_MUL IFN. The initial
1136 match is expected to be in OP1 and the initial match operands in args0. */
1138 internal_fn
1139 complex_fma_pattern::matches (complex_operation_t op,
1140 slp_tree_to_load_perm_map_t * /* perm_cache */,
1141 slp_tree *ref_node, vec<slp_tree> *ops)
1143 internal_fn ifn = IFN_LAST;
1145 /* Find the two components. We match Complex MUL first which reduces the
1146 amount of work this pattern has to do. After that we just match the
1147 head node and we're done.:
1149 * FMA: + +.
1151 We need to ignore the two_operands nodes that may also match.
1152 For that we can check if they have any scalar statements and also
1153 check that it's not a permute node as we're looking for a normal
1154 PLUS_EXPR operation. */
1155 if (op != CMPLX_NONE)
1156 return IFN_LAST;
1158 /* Find the two components. We match Complex MUL first which reduces the
1159 amount of work this pattern has to do. After that we just match the
1160 head node and we're done.:
1162 * FMA: + + on a non-two_operands node. */
1163 slp_tree vnode = *ref_node;
1164 if (SLP_TREE_LANE_PERMUTATION (vnode).exists ()
1165 || !SLP_TREE_CHILDREN (vnode).exists ()
1166 || !vect_match_expression_p (vnode, PLUS_EXPR))
1167 return IFN_LAST;
1169 slp_tree node = SLP_TREE_CHILDREN (vnode)[1];
1171 if (vect_match_call_p (node, IFN_COMPLEX_MUL))
1172 ifn = IFN_COMPLEX_FMA;
1173 else if (vect_match_call_p (node, IFN_COMPLEX_MUL_CONJ))
1174 ifn = IFN_COMPLEX_FMA_CONJ;
1175 else
1176 return IFN_LAST;
1178 if (!vect_pattern_validate_optab (ifn, vnode))
1179 return IFN_LAST;
1181 ops->truncate (0);
1182 ops->create (3);
1184 if (ifn == IFN_COMPLEX_FMA)
1186 ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
1187 ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
1188 ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
1190 else
1192 ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
1193 ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
1194 ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
1197 return ifn;
1200 /* Attempt to recognize a complex mul pattern. */
1202 vect_pattern*
1203 complex_fma_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1204 slp_tree *node)
1206 auto_vec<slp_tree> ops;
1207 complex_operation_t op
1208 = vect_detect_pair_op (*node, true, &ops);
1209 internal_fn ifn
1210 = complex_fma_pattern::matches (op, perm_cache, node, &ops);
1211 if (ifn == IFN_LAST)
1212 return NULL;
1214 return new complex_fma_pattern (node, &ops, ifn);
1217 /* Perform a replacement of the detected complex mul pattern with the new
1218 instruction sequences. */
1220 void
1221 complex_fma_pattern::build (vec_info *vinfo)
1223 slp_tree node = SLP_TREE_CHILDREN (*this->m_node)[1];
1225 SLP_TREE_CHILDREN (*this->m_node).release ();
1226 SLP_TREE_CHILDREN (*this->m_node).create (3);
1227 SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops);
1229 SLP_TREE_REF_COUNT (this->m_ops[1])++;
1230 SLP_TREE_REF_COUNT (this->m_ops[2])++;
1232 vect_free_slp_tree (node);
1234 complex_pattern::build (vinfo);
1237 /*******************************************************************************
1238 * complex_fms_pattern class
1239 ******************************************************************************/
1241 class complex_fms_pattern : public complex_pattern
1243 protected:
1244 complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1245 : complex_pattern (node, m_ops, ifn)
1247 this->m_num_args = 3;
1250 public:
1251 void build (vec_info *);
1252 static internal_fn
1253 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1254 vec<slp_tree> *);
1256 static vect_pattern*
1257 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1259 static vect_pattern*
1260 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1262 return new complex_fms_pattern (node, m_ops, ifn);
1267 /* Pattern matcher for trying to match complex multiply and accumulate
1268 and multiply and subtract patterns in SLP tree.
1269 If the operation matches then IFN is set to the operation it matched and
1270 the arguments to the two replacement statements are put in m_ops.
1272 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1274 This function matches the patterns shaped as:
1276 double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1277 double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1279 c[i] = c[i] - ax;
1280 c[i+1] = c[i+1] + bx;
1282 If a match occurred then TRUE is returned, else FALSE. The initial match is
1283 expected to be in OP1 and the initial match operands in args0. */
1285 internal_fn
1286 complex_fms_pattern::matches (complex_operation_t op,
1287 slp_tree_to_load_perm_map_t *perm_cache,
1288 slp_tree * ref_node, vec<slp_tree> *ops)
1290 internal_fn ifn = IFN_LAST;
1292 /* Find the two components. We match Complex MUL first which reduces the
1293 amount of work this pattern has to do. After that we just match the
1294 head node and we're done.:
1296 * FMS: - +. */
1297 slp_tree child = NULL;
1299 /* We need to ignore the two_operands nodes that may also match,
1300 for that we can check if they have any scalar statements and also
1301 check that it's not a permute node as we're looking for a normal
1302 PLUS_EXPR operation. */
1303 if (op != PLUS_MINUS)
1304 return IFN_LAST;
1306 child = SLP_TREE_CHILDREN ((*ops)[1])[1];
1307 if (vect_detect_pair_op (child) != MINUS_PLUS)
1308 return IFN_LAST;
1310 /* First two nodes must be a multiply. */
1311 auto_vec<slp_tree> muls;
1312 if (vect_match_call_complex_mla (child, 0) != MULT_MULT
1313 || vect_match_call_complex_mla (child, 1, &muls) != MULT_MULT)
1314 return IFN_LAST;
1316 /* Now operand2+4 may lead to another expression. */
1317 auto_vec<slp_tree> left_op, right_op;
1318 left_op.safe_splice (SLP_TREE_CHILDREN (muls[0]));
1319 right_op.safe_splice (SLP_TREE_CHILDREN (muls[1]));
1321 bool is_neg = vect_normalize_conj_loc (left_op);
1323 child = SLP_TREE_CHILDREN ((*ops)[1])[0];
1324 bool conj_first_operand = false;
1325 if (!vect_validate_multiplication (perm_cache, right_op, left_op, false,
1326 &conj_first_operand, true))
1327 return IFN_LAST;
1329 if (!is_neg)
1330 ifn = IFN_COMPLEX_FMS;
1331 else if (is_neg)
1332 ifn = IFN_COMPLEX_FMS_CONJ;
1334 if (!vect_pattern_validate_optab (ifn, *ref_node))
1335 return IFN_LAST;
1337 ops->truncate (0);
1338 ops->create (4);
1340 complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]);
1341 if (kind == PERM_EVENODD)
1343 ops->quick_push (child);
1344 ops->quick_push (right_op[0]);
1345 ops->quick_push (right_op[1]);
1346 ops->quick_push (left_op[1]);
1348 else if (kind == PERM_TOP)
1350 ops->quick_push (child);
1351 ops->quick_push (right_op[1]);
1352 ops->quick_push (right_op[0]);
1353 ops->quick_push (left_op[0]);
1355 else if (kind == PERM_EVENEVEN && !is_neg)
1357 ops->quick_push (child);
1358 ops->quick_push (right_op[1]);
1359 ops->quick_push (right_op[0]);
1360 ops->quick_push (left_op[0]);
1362 else
1364 ops->quick_push (child);
1365 ops->quick_push (right_op[1]);
1366 ops->quick_push (right_op[0]);
1367 ops->quick_push (left_op[1]);
1370 return ifn;
1373 /* Attempt to recognize a complex mul pattern. */
1375 vect_pattern*
1376 complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1377 slp_tree *node)
1379 auto_vec<slp_tree> ops;
1380 complex_operation_t op
1381 = vect_detect_pair_op (*node, true, &ops);
1382 internal_fn ifn
1383 = complex_fms_pattern::matches (op, perm_cache, node, &ops);
1384 if (ifn == IFN_LAST)
1385 return NULL;
1387 return new complex_fms_pattern (node, &ops, ifn);
1390 /* Perform a replacement of the detected complex mul pattern with the new
1391 instruction sequences. */
1393 void
1394 complex_fms_pattern::build (vec_info *vinfo)
1396 slp_tree node;
1397 unsigned i;
1398 slp_tree newnode =
1399 vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
1400 SLP_TREE_REF_COUNT (this->m_ops[0])++;
1401 SLP_TREE_REF_COUNT (this->m_ops[1])++;
1403 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1404 vect_free_slp_tree (node);
1406 SLP_TREE_CHILDREN (*this->m_node).release ();
1407 SLP_TREE_CHILDREN (*this->m_node).create (3);
1409 /* First re-arrange the children. */
1410 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
1411 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
1412 SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
1414 /* And then rewrite the node itself. */
1415 complex_pattern::build (vinfo);
1418 /*******************************************************************************
1419 * complex_operations_pattern class
1420 ******************************************************************************/
1422 /* This function combines all the existing pattern matchers above into one class
1423 that shares the functionality between them. The initial match is shared
1424 between all complex operations. */
1426 class complex_operations_pattern : public complex_pattern
1428 protected:
1429 complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
1430 internal_fn ifn)
1431 : complex_pattern (node, m_ops, ifn)
1433 this->m_num_args = 0;
1436 public:
1437 void build (vec_info *);
1438 static internal_fn
1439 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1440 vec<slp_tree> *);
1442 static vect_pattern*
1443 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1446 /* Dummy matches implementation for proxy object. */
1448 internal_fn
1449 complex_operations_pattern::
1450 matches (complex_operation_t /* op */,
1451 slp_tree_to_load_perm_map_t * /* perm_cache */,
1452 slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
1454 return IFN_LAST;
1457 /* Attempt to recognize a complex mul pattern. */
1459 vect_pattern*
1460 complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1461 slp_tree *node)
1463 auto_vec<slp_tree> ops;
1464 complex_operation_t op
1465 = vect_detect_pair_op (*node, true, &ops);
1466 internal_fn ifn = IFN_LAST;
1468 ifn = complex_fms_pattern::matches (op, perm_cache, node, &ops);
1469 if (ifn != IFN_LAST)
1470 return complex_fms_pattern::mkInstance (node, &ops, ifn);
1472 ifn = complex_mul_pattern::matches (op, perm_cache, node, &ops);
1473 if (ifn != IFN_LAST)
1474 return complex_mul_pattern::mkInstance (node, &ops, ifn);
1476 ifn = complex_fma_pattern::matches (op, perm_cache, node, &ops);
1477 if (ifn != IFN_LAST)
1478 return complex_fma_pattern::mkInstance (node, &ops, ifn);
1480 ifn = complex_add_pattern::matches (op, perm_cache, node, &ops);
1481 if (ifn != IFN_LAST)
1482 return complex_add_pattern::mkInstance (node, &ops, ifn);
1484 return NULL;
1487 /* Dummy implementation of build. */
1489 void
1490 complex_operations_pattern::build (vec_info * /* vinfo */)
1492 gcc_unreachable ();
1496 /* The addsub_pattern. */
1498 class addsub_pattern : public vect_pattern
1500 public:
1501 addsub_pattern (slp_tree *node, internal_fn ifn)
1502 : vect_pattern (node, NULL, ifn) {};
1504 void build (vec_info *);
1506 static vect_pattern*
1507 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1510 vect_pattern *
1511 addsub_pattern::recognize (slp_tree_to_load_perm_map_t *, slp_tree *node_)
1513 slp_tree node = *node_;
1514 if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
1515 || SLP_TREE_CHILDREN (node).length () != 2
1516 || SLP_TREE_LANE_PERMUTATION (node).length () % 2)
1517 return NULL;
1519 /* Match a blend of a plus and a minus op with the same number of plus and
1520 minus lanes on the same operands. */
1521 unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1522 unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1523 if (l0 == l1)
1524 return NULL;
1525 bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
1526 PLUS_EXPR);
1527 if (!l0add_p
1528 && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
1529 return NULL;
1530 bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
1531 PLUS_EXPR);
1532 if (!l1add_p
1533 && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
1534 return NULL;
1536 slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
1537 slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
1538 if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
1539 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
1540 || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
1541 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
1542 return NULL;
1544 for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
1546 std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
1547 /* It has to be alternating -, +, -,
1548 While we could permute the .ADDSUB inputs and the .ADDSUB output
1549 that's only profitable over the add + sub + blend if at least
1550 one of the permute is optimized which we can't determine here. */
1551 if (perm.first != ((i & 1) ? l1 : l0)
1552 || perm.second != i)
1553 return NULL;
1556 /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
1557 (l0add_p), see whether we have FMA variants. */
1558 if (!l0add_p
1559 && vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR))
1561 /* (c * d) -+ a */
1562 if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
1563 return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
1565 else if (l0add_p
1566 && vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR))
1568 /* (c * d) +- a */
1569 if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
1570 return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
1573 if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
1574 return new addsub_pattern (node_, IFN_VEC_ADDSUB);
1576 return NULL;
1579 void
1580 addsub_pattern::build (vec_info *vinfo)
1582 slp_tree node = *m_node;
1584 unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1585 unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1587 switch (m_ifn)
1589 case IFN_VEC_ADDSUB:
1591 slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
1592 slp_tree add = SLP_TREE_CHILDREN (node)[l1];
1594 /* Modify the blend node in-place. */
1595 SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
1596 SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
1597 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1598 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1600 /* Build IFN_VEC_ADDSUB from the sub representative operands. */
1601 stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
1602 gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
1603 gimple_assign_rhs1 (rep->stmt),
1604 gimple_assign_rhs2 (rep->stmt));
1605 gimple_call_set_lhs (call, make_ssa_name
1606 (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
1607 gimple_call_set_nothrow (call, true);
1608 gimple_set_bb (call, gimple_bb (rep->stmt));
1609 stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
1610 SLP_TREE_REPRESENTATIVE (node) = new_rep;
1611 STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1612 STMT_SLP_TYPE (new_rep) = pure_slp;
1613 STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1614 STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1615 STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
1616 SLP_TREE_CODE (node) = ERROR_MARK;
1617 SLP_TREE_LANE_PERMUTATION (node).release ();
1619 vect_free_slp_tree (sub);
1620 vect_free_slp_tree (add);
1621 break;
1623 case IFN_VEC_FMADDSUB:
1624 case IFN_VEC_FMSUBADD:
1626 slp_tree sub, add;
1627 if (m_ifn == IFN_VEC_FMADDSUB)
1629 sub = SLP_TREE_CHILDREN (node)[l0];
1630 add = SLP_TREE_CHILDREN (node)[l1];
1632 else /* m_ifn == IFN_VEC_FMSUBADD */
1634 sub = SLP_TREE_CHILDREN (node)[l1];
1635 add = SLP_TREE_CHILDREN (node)[l0];
1637 slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
1638 /* Modify the blend node in-place. */
1639 SLP_TREE_CHILDREN (node).safe_grow (3, true);
1640 SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
1641 SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
1642 SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
1643 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1644 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1645 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
1647 /* Build IFN_VEC_FMADDSUB from the mul/sub representative operands. */
1648 stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
1649 stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
1650 gcall *call = gimple_build_call_internal (m_ifn, 3,
1651 gimple_assign_rhs1 (mrep->stmt),
1652 gimple_assign_rhs2 (mrep->stmt),
1653 gimple_assign_rhs2 (srep->stmt));
1654 gimple_call_set_lhs (call, make_ssa_name
1655 (TREE_TYPE (gimple_assign_lhs (srep->stmt))));
1656 gimple_call_set_nothrow (call, true);
1657 gimple_set_bb (call, gimple_bb (srep->stmt));
1658 stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
1659 SLP_TREE_REPRESENTATIVE (node) = new_rep;
1660 STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1661 STMT_SLP_TYPE (new_rep) = pure_slp;
1662 STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1663 STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1664 STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep));
1665 SLP_TREE_CODE (node) = ERROR_MARK;
1666 SLP_TREE_LANE_PERMUTATION (node).release ();
1668 vect_free_slp_tree (sub);
1669 vect_free_slp_tree (add);
1670 break;
1672 default:;
1676 /*******************************************************************************
1677 * Pattern matching definitions
1678 ******************************************************************************/
1680 #define SLP_PATTERN(x) &x::recognize
1681 vect_pattern_decl_t slp_patterns[]
1683 /* For least amount of back-tracking and more efficient matching
1684 order patterns from the largest to the smallest. Especially if they
1685 overlap in what they can detect. */
1687 SLP_PATTERN (complex_operations_pattern),
1688 SLP_PATTERN (addsub_pattern)
1690 #undef SLP_PATTERN
1692 /* Set the number of SLP pattern matchers available. */
1693 size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(vect_pattern_decl_t);