RISC-V: Make stack_save_restore tests more robust
[official-gcc.git] / gcc / tree-vect-slp-patterns.cc
blob8bd4da258d15bba3f7b7ffe8bb829dc2638ca1db
1 /* SLP - Pattern matcher on SLP trees
2 Copyright (C) 2020-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "gimple-iterator.h"
36 #include "cfgloop.h"
37 #include "tree-vectorizer.h"
38 #include "langhooks.h"
39 #include "gimple-walk.h"
40 #include "dbgcnt.h"
41 #include "tree-vector-builder.h"
42 #include "vec-perm-indices.h"
43 #include "gimple-fold.h"
44 #include "internal-fn.h"
46 /* SLP Pattern matching mechanism.
48 This extension to the SLP vectorizer allows one to transform the generated SLP
49 tree based on any pattern. The difference between this and the normal vect
50 pattern matcher is that unlike the former, this matcher allows you to match
51 with instructions that do not belong to the same SSA dominator graph.
53 The only requirement that this pattern matcher has is that you are only
54 only allowed to either match an entire group or none.
56 The pattern matcher currently only allows you to perform replacements to
57 internal functions.
59 Once the patterns are matched it is one way, these cannot be undone. It is
60 currently not supported to match patterns recursively.
62 To add a new pattern, implement the vect_pattern class and add the type to
63 slp_patterns.
67 /*******************************************************************************
68 * vect_pattern class
69 ******************************************************************************/
71 /* Default implementation of recognize that performs matching, validation and
72 replacement of nodes but that can be overriden if required. */
74 static bool
75 vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
77 tree vectype = SLP_TREE_VECTYPE (node);
78 if (ifn == IFN_LAST || !vectype)
79 return false;
81 if (dump_enabled_p ())
82 dump_printf_loc (MSG_NOTE, vect_location,
83 "Found %s pattern in SLP tree\n",
84 internal_fn_name (ifn));
86 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
88 if (dump_enabled_p ())
89 dump_printf_loc (MSG_NOTE, vect_location,
90 "Target supports %s vectorization with mode %T\n",
91 internal_fn_name (ifn), vectype);
93 else
95 if (dump_enabled_p ())
97 if (!vectype)
98 dump_printf_loc (MSG_NOTE, vect_location,
99 "Target does not support vector type for %G\n",
100 STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)));
101 else
102 dump_printf_loc (MSG_NOTE, vect_location,
103 "Target does not support %s for vector type "
104 "%T\n", internal_fn_name (ifn), vectype);
106 return false;
108 return true;
111 /*******************************************************************************
112 * General helper types
113 ******************************************************************************/
115 /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
116 be matched when looking for expressions that we are interested matching for
117 complex numbers addition and mla. */
119 typedef enum _complex_operation : unsigned {
120 PLUS_PLUS,
121 MINUS_PLUS,
122 PLUS_MINUS,
123 MULT_MULT,
124 CMPLX_NONE
125 } complex_operation_t;
127 /*******************************************************************************
128 * General helper functions
129 ******************************************************************************/
131 /* Helper function of linear_loads_p that checks to see if the load permutation
132 is sequential and in monotonically increasing order of loads with no gaps.
135 static inline complex_perm_kinds_t
136 is_linear_load_p (load_permutation_t loads)
138 if (loads.length() == 0)
139 return PERM_UNKNOWN;
141 unsigned load, i;
142 complex_perm_kinds_t candidates[4]
143 = { PERM_ODDODD
144 , PERM_EVENEVEN
145 , PERM_EVENODD
146 , PERM_ODDEVEN
149 int valid_patterns = 4;
150 FOR_EACH_VEC_ELT (loads, i, load)
152 unsigned adj_load = load % 2;
153 if (candidates[0] != PERM_UNKNOWN && adj_load != 1)
155 candidates[0] = PERM_UNKNOWN;
156 valid_patterns--;
158 if (candidates[1] != PERM_UNKNOWN && adj_load != 0)
160 candidates[1] = PERM_UNKNOWN;
161 valid_patterns--;
163 if (candidates[2] != PERM_UNKNOWN && load != i)
165 candidates[2] = PERM_UNKNOWN;
166 valid_patterns--;
168 if (candidates[3] != PERM_UNKNOWN
169 && load != (i % 2 == 0 ? i + 1 : i - 1))
171 candidates[3] = PERM_UNKNOWN;
172 valid_patterns--;
175 if (valid_patterns == 0)
176 return PERM_UNKNOWN;
179 for (i = 0; i < sizeof(candidates); i++)
180 if (candidates[i] != PERM_UNKNOWN)
181 return candidates[i];
183 return PERM_UNKNOWN;
186 /* Combine complex_perm_kinds A and B into a new permute kind that describes the
187 resulting operation. */
189 static inline complex_perm_kinds_t
190 vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
192 if (a == b)
193 return a;
195 if (a == PERM_TOP)
196 return b;
198 if (b == PERM_TOP)
199 return a;
201 return PERM_UNKNOWN;
204 /* Check to see if all loads rooted in ROOT are linear. Linearity is
205 defined as having no gaps between values loaded. */
207 static complex_perm_kinds_t
208 linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
210 if (!root)
211 return PERM_UNKNOWN;
213 unsigned i;
214 complex_perm_kinds_t *tmp;
216 if ((tmp = perm_cache->get (root)) != NULL)
217 return *tmp;
219 complex_perm_kinds_t retval = PERM_UNKNOWN;
220 perm_cache->put (root, retval);
222 /* If it's a load node, then just read the load permute. */
223 if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
225 retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
226 perm_cache->put (root, retval);
227 return retval;
229 else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
231 retval = PERM_TOP;
232 perm_cache->put (root, retval);
233 return retval;
236 complex_perm_kinds_t kind = PERM_TOP;
238 slp_tree child;
239 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
241 complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
242 kind = vect_merge_perms (kind, res);
243 /* Unknown and Top are not valid on blends as they produce no permute. */
244 retval = kind;
245 if (kind == PERM_UNKNOWN || kind == PERM_TOP)
246 return retval;
249 retval = kind;
251 perm_cache->put (root, retval);
252 return retval;
256 /* This function attempts to make a node rooted in NODE is linear. If the node
257 if already linear than the node itself is returned in RESULT.
259 If the node is not linear then a new VEC_PERM_EXPR node is created with a
260 lane permute that when applied will make the node linear. If such a
261 permute cannot be created then FALSE is returned from the function.
263 Here linearity is defined as having a sequential, monotically increasing
264 load position inside the load permute generated by the loads reachable from
265 NODE. */
267 static slp_tree
268 vect_build_swap_evenodd_node (slp_tree node)
270 /* Attempt to linearise the permute. */
271 vec<std::pair<unsigned, unsigned> > zipped;
272 zipped.create (SLP_TREE_LANES (node));
274 for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
276 zipped.quick_push (std::make_pair (0, x+1));
277 zipped.quick_push (std::make_pair (0, x));
280 /* Create the new permute node and store it instead. */
281 slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
282 SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
283 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
284 SLP_TREE_CHILDREN (vnode).quick_push (node);
285 SLP_TREE_REF_COUNT (vnode) = 1;
286 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
287 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
288 SLP_TREE_REF_COUNT (node)++;
289 return vnode;
292 /* Checks to see of the expression represented by NODE is a gimple assign with
293 code CODE. */
295 static inline bool
296 vect_match_expression_p (slp_tree node, tree_code code)
298 if (!node
299 || !SLP_TREE_REPRESENTATIVE (node))
300 return false;
302 gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
303 if (!is_gimple_assign (expr)
304 || gimple_assign_rhs_code (expr) != code)
305 return false;
307 return true;
310 /* Check if the given lane permute in PERMUTES matches an alternating sequence
311 of {even odd even odd ...}. This to account for unrolled loops. Further
312 mode there resulting permute must be linear. */
314 static inline bool
315 vect_check_evenodd_blend (lane_permutation_t &permutes,
316 unsigned even, unsigned odd)
318 if (permutes.length () == 0
319 || permutes.length () % 2 != 0)
320 return false;
322 unsigned val[2] = {even, odd};
323 unsigned seed = 0;
324 for (unsigned i = 0; i < permutes.length (); i++)
325 if (permutes[i].first != val[i % 2]
326 || permutes[i].second != seed++)
327 return false;
329 return true;
332 /* This function will match the two gimple expressions representing NODE1 and
333 NODE2 in parallel and returns the pair operation that represents the two
334 expressions in the two statements.
336 If match is successful then the corresponding complex_operation is
337 returned and the arguments to the two matched operations are returned in OPS.
339 If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
340 from the two nodes alternatingly.
342 If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
344 e.g. the following gimple statements
346 stmt 0 _39 = _37 + _12;
347 stmt 1 _6 = _38 - _36;
349 will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
352 static complex_operation_t
353 vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
354 bool two_operands = true, vec<slp_tree> *ops = NULL)
356 complex_operation_t result = CMPLX_NONE;
358 if (vect_match_expression_p (node1, MINUS_EXPR)
359 && vect_match_expression_p (node2, PLUS_EXPR)
360 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
361 result = MINUS_PLUS;
362 else if (vect_match_expression_p (node1, PLUS_EXPR)
363 && vect_match_expression_p (node2, MINUS_EXPR)
364 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
365 result = PLUS_MINUS;
366 else if (vect_match_expression_p (node1, PLUS_EXPR)
367 && vect_match_expression_p (node2, PLUS_EXPR))
368 result = PLUS_PLUS;
369 else if (vect_match_expression_p (node1, MULT_EXPR)
370 && vect_match_expression_p (node2, MULT_EXPR))
371 result = MULT_MULT;
373 if (result != CMPLX_NONE && ops != NULL)
375 if (two_operands)
377 auto l0node = SLP_TREE_CHILDREN (node1);
378 auto l1node = SLP_TREE_CHILDREN (node2);
380 /* Check if the tree is connected as we expect it. */
381 if (!((l0node[0] == l1node[0] && l0node[1] == l1node[1])
382 || (l0node[0] == l1node[1] && l0node[1] == l1node[0])))
383 return CMPLX_NONE;
385 ops->safe_push (node1);
386 ops->safe_push (node2);
388 return result;
391 /* Overload of vect_detect_pair_op that matches against the representative
392 statements in the children of NODE. It is expected that NODE has exactly
393 two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */
395 static complex_operation_t
396 vect_detect_pair_op (slp_tree node, bool two_operands = true,
397 vec<slp_tree> *ops = NULL)
399 if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR)
400 return CMPLX_NONE;
402 if (SLP_TREE_CHILDREN (node).length () != 2)
403 return CMPLX_NONE;
405 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
406 lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
408 return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
409 ops);
412 /*******************************************************************************
413 * complex_pattern class
414 ******************************************************************************/
416 /* SLP Complex Numbers pattern matching.
418 As an example, the following simple loop:
420 double a[restrict N]; double b[restrict N]; double c[restrict N];
422 for (int i=0; i < N; i+=2)
424 c[i] = a[i] - b[i+1];
425 c[i+1] = a[i+1] + b[i];
428 which represents a complex addition on with a rotation of 90* around the
429 argand plane. i.e. if `a` and `b` were complex numbers then this would be the
430 same as `a + (b * I)`.
432 Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
433 both recognized in order for the pattern to work. As an SLP tree this is
434 represented as
436 +--------------------------------+
437 | stmt 0 *_9 = _10; |
438 | stmt 1 *_15 = _16; |
439 +--------------------------------+
443 +--------------------------------+
444 | stmt 0 _10 = _4 - _8; |
445 | stmt 1 _16 = _12 + _14; |
446 | lane permutation { 0[0] 1[1] } |
447 +--------------------------------+
451 +-----+ | | +-----+
452 | | | | | |
453 +-----| { } |<-----+ +----->| { } --------+
454 | | | +------------------| | |
455 | +-----+ | +-----+ |
456 | | | |
457 | | | |
458 | +------|------------------+ |
459 | | | |
460 v v v v
461 +--------------------------+ +--------------------------------+
462 | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
463 | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
464 | load permutation { 1 0 } | | load permutation { 0 1 } |
465 +--------------------------+ +--------------------------------+
467 The pattern matcher allows you to replace both statements 0 and 1 or none at
468 all. Because this operation is a two operands operation the actual nodes
469 being replaced are those in the { } nodes. The actual scalar statements
470 themselves are not replaced or used during the matching but instead the
471 SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to
472 replace and match on any number of nodes.
474 Because the pattern matcher matches on the representative statement for the
475 SLP node the case of two_operators it allows you to match the children of the
476 node. This is done using the method `recognize ()`.
480 /* The complex_pattern class contains common code for pattern matchers that work
481 on complex numbers. These provide functionality to allow de-construction and
482 validation of sequences depicting/transforming REAL and IMAG pairs. */
484 class complex_pattern : public vect_pattern
486 protected:
487 auto_vec<slp_tree> m_workset;
488 complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
489 : vect_pattern (node, m_ops, ifn)
491 this->m_workset.safe_push (*node);
494 public:
495 void build (vec_info *) override;
497 static internal_fn
498 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
499 vec<slp_tree> *);
502 /* Create a replacement pattern statement for each node in m_node and inserts
503 the new statement into m_node as the new representative statement. The old
504 statement is marked as being in a pattern defined by the new statement. The
505 statement is created as call to internal function IFN with m_num_args
506 arguments.
508 Futhermore the new pattern is also added to the vectorization information
509 structure VINFO and the old statement STMT_INFO is marked as unused while
510 the new statement is marked as used and the number of SLP uses of the new
511 statement is incremented.
513 The newly created SLP nodes are marked as SLP only and will be dissolved
514 if SLP is aborted.
516 The newly created gimple call is returned and the BB remains unchanged.
518 This default method is designed to only match against simple operands where
519 all the input and output types are the same.
522 void
523 complex_pattern::build (vec_info *vinfo)
525 stmt_vec_info stmt_info;
527 auto_vec<tree> args;
528 args.create (this->m_num_args);
529 args.quick_grow_cleared (this->m_num_args);
530 slp_tree node;
531 unsigned ix;
532 stmt_vec_info call_stmt_info;
533 gcall *call_stmt = NULL;
535 /* Now modify the nodes themselves. */
536 FOR_EACH_VEC_ELT (this->m_workset, ix, node)
538 /* Calculate the location of the statement in NODE to replace. */
539 stmt_info = SLP_TREE_REPRESENTATIVE (node);
540 stmt_vec_info reduc_def
541 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
542 gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
543 tree lhs_old_stmt = gimple_get_lhs (old_stmt);
544 tree type = TREE_TYPE (lhs_old_stmt);
546 /* Create the argument set for use by gimple_build_call_internal_vec. */
547 for (unsigned i = 0; i < this->m_num_args; i++)
548 args[i] = lhs_old_stmt;
550 /* Create the new pattern statements. */
551 call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
552 tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
553 gimple_call_set_lhs (call_stmt, var);
554 gimple_set_location (call_stmt, gimple_location (old_stmt));
555 gimple_call_set_nothrow (call_stmt, true);
557 /* Adjust the book-keeping for the new and old statements for use during
558 SLP. This is required to get the right VF and statement during SLP
559 analysis. These changes are created after relevancy has been set for
560 the nodes as such we need to manually update them. Any changes will be
561 undone if SLP is cancelled. */
562 call_stmt_info
563 = vinfo->add_pattern_stmt (call_stmt, stmt_info);
565 /* Make sure to mark the representative statement pure_slp and
566 relevant and transfer reduction info. */
567 STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
568 STMT_SLP_TYPE (call_stmt_info) = pure_slp;
569 STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
571 gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
572 STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
573 STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
575 /* Since we are replacing all the statements in the group with the same
576 thing it doesn't really matter. So just set it every time a new stmt
577 is created. */
578 SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
579 SLP_TREE_LANE_PERMUTATION (node).release ();
580 SLP_TREE_CODE (node) = CALL_EXPR;
584 /*******************************************************************************
585 * complex_add_pattern class
586 ******************************************************************************/
588 class complex_add_pattern : public complex_pattern
590 protected:
591 complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
592 : complex_pattern (node, m_ops, ifn)
594 this->m_num_args = 2;
597 public:
598 void build (vec_info *) final override;
599 static internal_fn
600 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
601 slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
603 static vect_pattern*
604 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
605 slp_tree *);
607 static vect_pattern*
608 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
610 return new complex_add_pattern (node, m_ops, ifn);
614 /* Perform a replacement of the detected complex add pattern with the new
615 instruction sequences. */
617 void
618 complex_add_pattern::build (vec_info *vinfo)
620 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
622 slp_tree node = this->m_ops[0];
623 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
625 /* First re-arrange the children. */
626 SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
627 SLP_TREE_CHILDREN (*this->m_node)[1] =
628 vect_build_swap_evenodd_node (children[1]);
630 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
631 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
632 vect_free_slp_tree (this->m_ops[0]);
633 vect_free_slp_tree (this->m_ops[1]);
635 complex_pattern::build (vinfo);
638 /* Pattern matcher for trying to match complex addition pattern in SLP tree.
640 If no match is found then IFN is set to IFN_LAST.
641 This function matches the patterns shaped as:
643 c[i] = a[i] - b[i+1];
644 c[i+1] = a[i+1] + b[i];
646 If a match occurred then TRUE is returned, else FALSE. The initial match is
647 expected to be in OP1 and the initial match operands in args0. */
649 internal_fn
650 complex_add_pattern::matches (complex_operation_t op,
651 slp_tree_to_load_perm_map_t *perm_cache,
652 slp_compat_nodes_map_t * /* compat_cache */,
653 slp_tree *node, vec<slp_tree> *ops)
655 internal_fn ifn = IFN_LAST;
657 /* Find the two components. Rotation in the complex plane will modify
658 the operations:
660 * Rotation 0: + +
661 * Rotation 90: - +
662 * Rotation 180: - -
663 * Rotation 270: + -
665 Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
666 to care about them here. */
667 if (op == MINUS_PLUS)
668 ifn = IFN_COMPLEX_ADD_ROT90;
669 else if (op == PLUS_MINUS)
670 ifn = IFN_COMPLEX_ADD_ROT270;
671 else
672 return ifn;
674 /* verify that there is a permute, otherwise this isn't a pattern we
675 we support. */
676 gcc_assert (ops->length () == 2);
678 vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
680 /* First node must be unpermuted. */
681 if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
682 return IFN_LAST;
684 /* Second node must be permuted. */
685 if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
686 return IFN_LAST;
688 if (!vect_pattern_validate_optab (ifn, *node))
689 return IFN_LAST;
691 return ifn;
694 /* Attempt to recognize a complex add pattern. */
696 vect_pattern*
697 complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
698 slp_compat_nodes_map_t *compat_cache,
699 slp_tree *node)
701 auto_vec<slp_tree> ops;
702 complex_operation_t op
703 = vect_detect_pair_op (*node, true, &ops);
704 internal_fn ifn
705 = complex_add_pattern::matches (op, perm_cache, compat_cache, node, &ops);
706 if (ifn == IFN_LAST)
707 return NULL;
709 return new complex_add_pattern (node, &ops, ifn);
712 /*******************************************************************************
713 * complex_mul_pattern
714 ******************************************************************************/
716 /* Helper function to check if PERM is KIND or PERM_TOP. */
718 static inline bool
719 is_eq_or_top (slp_tree_to_load_perm_map_t *perm_cache,
720 slp_tree op1, complex_perm_kinds_t kind1,
721 slp_tree op2, complex_perm_kinds_t kind2)
723 complex_perm_kinds_t perm1 = linear_loads_p (perm_cache, op1);
724 if (perm1 != kind1 && perm1 != PERM_TOP)
725 return false;
727 complex_perm_kinds_t perm2 = linear_loads_p (perm_cache, op2);
728 if (perm2 != kind2 && perm2 != PERM_TOP)
729 return false;
731 return true;
734 enum _conj_status { CONJ_NONE, CONJ_FST, CONJ_SND };
736 static inline bool
737 compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache,
738 slp_tree a, int *pa, slp_tree b, int *pb)
740 bool *tmp;
741 std::pair<slp_tree, slp_tree> key = std::make_pair(a, b);
742 if ((tmp = compat_cache->get (key)) != NULL)
743 return *tmp;
745 compat_cache->put (key, false);
747 if (SLP_TREE_CHILDREN (a).length () != SLP_TREE_CHILDREN (b).length ())
748 return false;
750 if (SLP_TREE_DEF_TYPE (a) != SLP_TREE_DEF_TYPE (b))
751 return false;
753 /* Only internal nodes can be loads, as such we can't check further if they
754 are externals. */
755 if (SLP_TREE_DEF_TYPE (a) != vect_internal_def)
757 for (unsigned i = 0; i < SLP_TREE_SCALAR_OPS (a).length (); i++)
759 tree op1 = SLP_TREE_SCALAR_OPS (a)[pa[i % 2]];
760 tree op2 = SLP_TREE_SCALAR_OPS (b)[pb[i % 2]];
761 if (!operand_equal_p (op1, op2, 0))
762 return false;
765 compat_cache->put (key, true);
766 return true;
769 auto a_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (a));
770 auto b_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (b));
772 if (gimple_code (a_stmt) != gimple_code (b_stmt))
773 return false;
775 /* code, children, type, externals, loads, constants */
776 if (gimple_num_args (a_stmt) != gimple_num_args (b_stmt))
777 return false;
779 /* At this point, a and b are known to be the same gimple operations. */
780 if (is_gimple_call (a_stmt))
782 if (!compatible_calls_p (dyn_cast <gcall *> (a_stmt),
783 dyn_cast <gcall *> (b_stmt)))
784 return false;
786 else if (!is_gimple_assign (a_stmt))
787 return false;
788 else
790 tree_code acode = gimple_assign_rhs_code (a_stmt);
791 tree_code bcode = gimple_assign_rhs_code (b_stmt);
792 if ((acode == REALPART_EXPR || acode == IMAGPART_EXPR)
793 && (bcode == REALPART_EXPR || bcode == IMAGPART_EXPR))
794 return true;
796 if (acode != bcode)
797 return false;
800 if (!SLP_TREE_LOAD_PERMUTATION (a).exists ()
801 || !SLP_TREE_LOAD_PERMUTATION (b).exists ())
803 for (unsigned i = 0; i < gimple_num_args (a_stmt); i++)
805 tree t1 = gimple_arg (a_stmt, i);
806 tree t2 = gimple_arg (b_stmt, i);
807 if (TREE_CODE (t1) != TREE_CODE (t2))
808 return false;
810 /* If SSA name then we will need to inspect the children
811 so we can punt here. */
812 if (TREE_CODE (t1) == SSA_NAME)
813 continue;
815 if (!operand_equal_p (t1, t2, 0))
816 return false;
819 else
821 auto dr1 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a));
822 auto dr2 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b));
823 /* Don't check the last dimension as that's checked by the lineary
824 checks. This check is also much stricter than what we need
825 because it doesn't consider loading from adjacent elements
826 in the same struct as loading from the same base object.
827 But for now, I'll play it safe. */
828 if (!same_data_refs (dr1, dr2, 1))
829 return false;
832 for (unsigned i = 0; i < SLP_TREE_CHILDREN (a).length (); i++)
834 if (!compatible_complex_nodes_p (compat_cache,
835 SLP_TREE_CHILDREN (a)[i], pa,
836 SLP_TREE_CHILDREN (b)[i], pb))
837 return false;
840 compat_cache->put (key, true);
841 return true;
844 static inline bool
845 vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
846 slp_compat_nodes_map_t *compat_cache,
847 vec<slp_tree> &left_op,
848 vec<slp_tree> &right_op,
849 bool subtract,
850 enum _conj_status *_status)
852 auto_vec<slp_tree> ops;
853 enum _conj_status stats = CONJ_NONE;
855 /* The complex operations can occur in two layouts and two permute sequences
856 so declare them and re-use them. */
857 int styles[][4] = { { 0, 2, 1, 3} /* {L1, R1} + {L2, R2}. */
858 , { 0, 3, 1, 2} /* {L1, R2} + {L2, R1}. */
861 /* Now for the corresponding permutes that go with these values. */
862 complex_perm_kinds_t perms[][4]
863 = { { PERM_EVENEVEN, PERM_ODDODD, PERM_EVENODD, PERM_ODDEVEN }
864 , { PERM_EVENODD, PERM_ODDEVEN, PERM_EVENEVEN, PERM_ODDODD }
867 /* These permutes are used during comparisons of externals on which
868 we require strict equality. */
869 int cq[][4][2]
870 = { { { 0, 0 }, { 1, 1 }, { 0, 1 }, { 1, 0 } }
871 , { { 0, 1 }, { 1, 0 }, { 0, 0 }, { 1, 1 } }
874 /* Default to style and perm 0, most operations use this one. */
875 int style = 0;
876 int perm = subtract ? 1 : 0;
878 /* Check if we have a negate operation, if so absorb the node and continue
879 looking. */
880 bool neg0 = vect_match_expression_p (right_op[0], NEGATE_EXPR);
881 bool neg1 = vect_match_expression_p (right_op[1], NEGATE_EXPR);
883 /* Determine which style we're looking at. We only have different ones
884 whenever a conjugate is involved. */
885 if (neg0 && neg1)
887 else if (neg0)
889 right_op[0] = SLP_TREE_CHILDREN (right_op[0])[0];
890 stats = CONJ_FST;
891 if (subtract)
892 perm = 0;
894 else if (neg1)
896 right_op[1] = SLP_TREE_CHILDREN (right_op[1])[0];
897 stats = CONJ_SND;
898 perm = 1;
901 *_status = stats;
903 /* Flatten the inputs after we've remapped them. */
904 ops.create (4);
905 ops.safe_splice (left_op);
906 ops.safe_splice (right_op);
908 /* Extract out the elements to check. */
909 slp_tree op0 = ops[styles[style][0]];
910 slp_tree op1 = ops[styles[style][1]];
911 slp_tree op2 = ops[styles[style][2]];
912 slp_tree op3 = ops[styles[style][3]];
914 /* Do cheapest test first. If failed no need to analyze further. */
915 if (linear_loads_p (perm_cache, op0) != perms[perm][0]
916 || linear_loads_p (perm_cache, op1) != perms[perm][1]
917 || !is_eq_or_top (perm_cache, op2, perms[perm][2], op3, perms[perm][3]))
918 return false;
920 return compatible_complex_nodes_p (compat_cache, op0, cq[perm][0], op1,
921 cq[perm][1])
922 && compatible_complex_nodes_p (compat_cache, op2, cq[perm][2], op3,
923 cq[perm][3]);
926 /* This function combines two nodes containing only even and only odd lanes
927 together into a single node which contains the nodes in even/odd order
928 by using a lane permute.
930 The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
931 So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
933 The tree REPRESENTATION is taken from the supplied REP along with the
934 vectype which must be the same between all three nodes.
937 static slp_tree
938 vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
940 vec<std::pair<unsigned, unsigned> > perm;
941 perm.create (SLP_TREE_LANES (rep));
943 for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
945 perm.quick_push (std::make_pair (0, x));
946 perm.quick_push (std::make_pair (1, x+1));
949 slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
950 SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
951 SLP_TREE_LANE_PERMUTATION (vnode) = perm;
953 SLP_TREE_CHILDREN (vnode).create (2);
954 SLP_TREE_CHILDREN (vnode).quick_push (even);
955 SLP_TREE_CHILDREN (vnode).quick_push (odd);
956 SLP_TREE_REF_COUNT (even)++;
957 SLP_TREE_REF_COUNT (odd)++;
958 SLP_TREE_REF_COUNT (vnode) = 1;
960 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
961 gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
962 /* Representation is set to that of the current node as the vectorizer
963 can't deal with VEC_PERMs with no representation, as would be the
964 case with invariants. */
965 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
966 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
967 return vnode;
970 class complex_mul_pattern : public complex_pattern
972 protected:
973 complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
974 : complex_pattern (node, m_ops, ifn)
976 this->m_num_args = 2;
979 public:
980 void build (vec_info *) final override;
981 static internal_fn
982 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
983 slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
985 static vect_pattern*
986 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
987 slp_tree *);
989 static vect_pattern*
990 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
992 return new complex_mul_pattern (node, m_ops, ifn);
997 /* Pattern matcher for trying to match complex multiply and complex multiply
998 and accumulate pattern in SLP tree. If the operation matches then IFN
999 is set to the operation it matched and the arguments to the two
1000 replacement statements are put in m_ops.
1002 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1004 This function matches the patterns shaped as:
1006 double ax = (b[i+1] * a[i]);
1007 double bx = (a[i+1] * b[i]);
1009 c[i] = c[i] - ax;
1010 c[i+1] = c[i+1] + bx;
1012 If a match occurred then TRUE is returned, else FALSE. The initial match is
1013 expected to be in OP1 and the initial match operands in args0. */
1015 internal_fn
1016 complex_mul_pattern::matches (complex_operation_t op,
1017 slp_tree_to_load_perm_map_t *perm_cache,
1018 slp_compat_nodes_map_t *compat_cache,
1019 slp_tree *node, vec<slp_tree> *ops)
1021 internal_fn ifn = IFN_LAST;
1023 if (op != MINUS_PLUS)
1024 return IFN_LAST;
1026 auto childs = *ops;
1027 auto l0node = SLP_TREE_CHILDREN (childs[0]);
1029 bool mul0 = vect_match_expression_p (l0node[0], MULT_EXPR);
1030 bool mul1 = vect_match_expression_p (l0node[1], MULT_EXPR);
1031 if (!mul0 && !mul1)
1032 return IFN_LAST;
1034 /* Now operand2+4 may lead to another expression. */
1035 auto_vec<slp_tree> left_op, right_op;
1036 slp_tree add0 = NULL;
1038 /* Check if we may be a multiply add. It's only valid to form FMAs
1039 with -ffp-contract=fast. */
1040 if (!mul0
1041 && (flag_fp_contract_mode == FP_CONTRACT_FAST
1042 || !FLOAT_TYPE_P (SLP_TREE_VECTYPE (*node)))
1043 && vect_match_expression_p (l0node[0], PLUS_EXPR))
1045 auto vals = SLP_TREE_CHILDREN (l0node[0]);
1046 /* Check if it's a multiply, otherwise no idea what this is. */
1047 if (!(mul0 = vect_match_expression_p (vals[1], MULT_EXPR)))
1048 return IFN_LAST;
1050 /* Check if the ADD is linear, otherwise it's not valid complex FMA. */
1051 if (linear_loads_p (perm_cache, vals[0]) != PERM_EVENODD)
1052 return IFN_LAST;
1054 left_op.safe_splice (SLP_TREE_CHILDREN (vals[1]));
1055 add0 = vals[0];
1057 else
1058 left_op.safe_splice (SLP_TREE_CHILDREN (l0node[0]));
1060 right_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1062 if (left_op.length () != 2
1063 || right_op.length () != 2
1064 || !mul0
1065 || !mul1
1066 || linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
1067 return IFN_LAST;
1069 enum _conj_status status;
1070 if (!vect_validate_multiplication (perm_cache, compat_cache, left_op,
1071 right_op, false, &status))
1072 return IFN_LAST;
1074 if (status == CONJ_NONE)
1076 if (add0)
1077 ifn = IFN_COMPLEX_FMA;
1078 else
1079 ifn = IFN_COMPLEX_MUL;
1081 else
1083 if(add0)
1084 ifn = IFN_COMPLEX_FMA_CONJ;
1085 else
1086 ifn = IFN_COMPLEX_MUL_CONJ;
1089 if (!vect_pattern_validate_optab (ifn, *node))
1090 return IFN_LAST;
1092 ops->truncate (0);
1093 ops->create (add0 ? 4 : 3);
1095 if (add0)
1096 ops->quick_push (add0);
1098 complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]);
1099 if (kind == PERM_EVENODD || kind == PERM_TOP)
1101 ops->quick_push (left_op[1]);
1102 ops->quick_push (right_op[1]);
1103 ops->quick_push (left_op[0]);
1105 else if (kind == PERM_EVENEVEN && status != CONJ_SND)
1107 ops->quick_push (left_op[0]);
1108 ops->quick_push (right_op[0]);
1109 ops->quick_push (left_op[1]);
1111 else
1113 ops->quick_push (left_op[0]);
1114 ops->quick_push (right_op[1]);
1115 ops->quick_push (left_op[1]);
1118 return ifn;
1121 /* Attempt to recognize a complex mul pattern. */
1123 vect_pattern*
1124 complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1125 slp_compat_nodes_map_t *compat_cache,
1126 slp_tree *node)
1128 auto_vec<slp_tree> ops;
1129 complex_operation_t op
1130 = vect_detect_pair_op (*node, true, &ops);
1131 internal_fn ifn
1132 = complex_mul_pattern::matches (op, perm_cache, compat_cache, node, &ops);
1133 if (ifn == IFN_LAST)
1134 return NULL;
1136 return new complex_mul_pattern (node, &ops, ifn);
1139 /* Perform a replacement of the detected complex mul pattern with the new
1140 instruction sequences. */
1142 void
1143 complex_mul_pattern::build (vec_info *vinfo)
1145 slp_tree node;
1146 unsigned i;
1147 switch (this->m_ifn)
1149 case IFN_COMPLEX_MUL:
1150 case IFN_COMPLEX_MUL_CONJ:
1152 slp_tree newnode
1153 = vect_build_combine_node (this->m_ops[0], this->m_ops[1],
1154 *this->m_node);
1155 SLP_TREE_REF_COUNT (this->m_ops[2])++;
1157 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1158 vect_free_slp_tree (node);
1160 /* First re-arrange the children. */
1161 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
1162 SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
1163 SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1164 break;
1166 case IFN_COMPLEX_FMA:
1167 case IFN_COMPLEX_FMA_CONJ:
1169 SLP_TREE_REF_COUNT (this->m_ops[0])++;
1170 slp_tree newnode
1171 = vect_build_combine_node (this->m_ops[1], this->m_ops[2],
1172 *this->m_node);
1173 SLP_TREE_REF_COUNT (this->m_ops[3])++;
1175 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1176 vect_free_slp_tree (node);
1178 /* First re-arrange the children. */
1179 SLP_TREE_CHILDREN (*this->m_node).safe_grow (3);
1180 SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[3];
1181 SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1182 SLP_TREE_CHILDREN (*this->m_node)[2] = this->m_ops[0];
1184 /* Tell the builder to expect an extra argument. */
1185 this->m_num_args++;
1186 break;
1188 default:
1189 gcc_unreachable ();
1192 /* And then rewrite the node itself. */
1193 complex_pattern::build (vinfo);
1196 /*******************************************************************************
1197 * complex_fms_pattern class
1198 ******************************************************************************/
1200 class complex_fms_pattern : public complex_pattern
1202 protected:
1203 complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1204 : complex_pattern (node, m_ops, ifn)
1206 this->m_num_args = 3;
1209 public:
1210 void build (vec_info *) final override;
1211 static internal_fn
1212 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1213 slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
1215 static vect_pattern*
1216 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1217 slp_tree *);
1219 static vect_pattern*
1220 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1222 return new complex_fms_pattern (node, m_ops, ifn);
1227 /* Pattern matcher for trying to match complex multiply and subtract pattern
1228 in SLP tree. If the operation matches then IFN is set to the operation
1229 it matched and the arguments to the two replacement statements are put in
1230 m_ops.
1232 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1234 This function matches the patterns shaped as:
1236 double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1237 double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1239 c[i] = c[i] - ax;
1240 c[i+1] = c[i+1] + bx;
1242 If a match occurred then TRUE is returned, else FALSE. The initial match is
1243 expected to be in OP1 and the initial match operands in args0. */
1245 internal_fn
1246 complex_fms_pattern::matches (complex_operation_t op,
1247 slp_tree_to_load_perm_map_t *perm_cache,
1248 slp_compat_nodes_map_t *compat_cache,
1249 slp_tree * ref_node, vec<slp_tree> *ops)
1251 internal_fn ifn = IFN_LAST;
1253 /* We need to ignore the two_operands nodes that may also match,
1254 for that we can check if they have any scalar statements and also
1255 check that it's not a permute node as we're looking for a normal
1256 MINUS_EXPR operation. */
1257 if (op != CMPLX_NONE)
1258 return IFN_LAST;
1260 slp_tree root = *ref_node;
1261 if (!vect_match_expression_p (root, MINUS_EXPR))
1262 return IFN_LAST;
1264 /* TODO: Support invariants here, with the new layout CADD now
1265 can match before we get a chance to try CFMS. */
1266 auto nodes = SLP_TREE_CHILDREN (root);
1267 if (!vect_match_expression_p (nodes[1], MULT_EXPR)
1268 || vect_detect_pair_op (nodes[0]) != PLUS_MINUS)
1269 return IFN_LAST;
1271 auto childs = SLP_TREE_CHILDREN (nodes[0]);
1272 auto l0node = SLP_TREE_CHILDREN (childs[0]);
1274 /* Now operand2+4 may lead to another expression. */
1275 auto_vec<slp_tree> left_op, right_op;
1276 left_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1277 right_op.safe_splice (SLP_TREE_CHILDREN (nodes[1]));
1279 /* If these nodes don't have any children then they're
1280 not ones we're interested in. */
1281 if (left_op.length () != 2
1282 || right_op.length () != 2
1283 || !vect_match_expression_p (l0node[1], MULT_EXPR))
1284 return IFN_LAST;
1286 enum _conj_status status;
1287 if (!vect_validate_multiplication (perm_cache, compat_cache, right_op,
1288 left_op, true, &status))
1289 return IFN_LAST;
1291 if (status == CONJ_NONE)
1292 ifn = IFN_COMPLEX_FMS;
1293 else
1294 ifn = IFN_COMPLEX_FMS_CONJ;
1296 if (!vect_pattern_validate_optab (ifn, *ref_node))
1297 return IFN_LAST;
1299 ops->truncate (0);
1300 ops->create (4);
1302 complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]);
1303 if (kind == PERM_EVENODD)
1305 ops->quick_push (l0node[0]);
1306 ops->quick_push (right_op[0]);
1307 ops->quick_push (right_op[1]);
1308 ops->quick_push (left_op[1]);
1310 else
1312 ops->quick_push (l0node[0]);
1313 ops->quick_push (right_op[1]);
1314 ops->quick_push (right_op[0]);
1315 ops->quick_push (left_op[0]);
1318 return ifn;
1321 /* Attempt to recognize a complex mul pattern. */
1323 vect_pattern*
1324 complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1325 slp_compat_nodes_map_t *compat_cache,
1326 slp_tree *node)
1328 auto_vec<slp_tree> ops;
1329 complex_operation_t op
1330 = vect_detect_pair_op (*node, true, &ops);
1331 internal_fn ifn
1332 = complex_fms_pattern::matches (op, perm_cache, compat_cache, node, &ops);
1333 if (ifn == IFN_LAST)
1334 return NULL;
1336 return new complex_fms_pattern (node, &ops, ifn);
1339 /* Perform a replacement of the detected complex mul pattern with the new
1340 instruction sequences. */
1342 void
1343 complex_fms_pattern::build (vec_info *vinfo)
1345 slp_tree node;
1346 unsigned i;
1347 slp_tree newnode =
1348 vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
1349 SLP_TREE_REF_COUNT (this->m_ops[0])++;
1350 SLP_TREE_REF_COUNT (this->m_ops[1])++;
1352 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1353 vect_free_slp_tree (node);
1355 SLP_TREE_CHILDREN (*this->m_node).release ();
1356 SLP_TREE_CHILDREN (*this->m_node).create (3);
1358 /* First re-arrange the children. */
1359 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
1360 SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
1361 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
1363 /* And then rewrite the node itself. */
1364 complex_pattern::build (vinfo);
1367 /*******************************************************************************
1368 * complex_operations_pattern class
1369 ******************************************************************************/
1371 /* This function combines all the existing pattern matchers above into one class
1372 that shares the functionality between them. The initial match is shared
1373 between all complex operations. */
1375 class complex_operations_pattern : public complex_pattern
1377 protected:
1378 complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
1379 internal_fn ifn)
1380 : complex_pattern (node, m_ops, ifn)
1382 this->m_num_args = 0;
1385 public:
1386 void build (vec_info *) final override;
1387 static internal_fn
1388 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1389 slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
1391 static vect_pattern*
1392 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1393 slp_tree *);
1396 /* Dummy matches implementation for proxy object. */
1398 internal_fn
1399 complex_operations_pattern::
1400 matches (complex_operation_t /* op */,
1401 slp_tree_to_load_perm_map_t * /* perm_cache */,
1402 slp_compat_nodes_map_t * /* compat_cache */,
1403 slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
1405 return IFN_LAST;
1408 /* Attempt to recognize a complex mul pattern. */
1410 vect_pattern*
1411 complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1412 slp_compat_nodes_map_t *ccache,
1413 slp_tree *node)
1415 auto_vec<slp_tree> ops;
1416 complex_operation_t op
1417 = vect_detect_pair_op (*node, true, &ops);
1418 internal_fn ifn = IFN_LAST;
1420 ifn = complex_fms_pattern::matches (op, perm_cache, ccache, node, &ops);
1421 if (ifn != IFN_LAST)
1422 return complex_fms_pattern::mkInstance (node, &ops, ifn);
1424 ifn = complex_mul_pattern::matches (op, perm_cache, ccache, node, &ops);
1425 if (ifn != IFN_LAST)
1426 return complex_mul_pattern::mkInstance (node, &ops, ifn);
1428 ifn = complex_add_pattern::matches (op, perm_cache, ccache, node, &ops);
1429 if (ifn != IFN_LAST)
1430 return complex_add_pattern::mkInstance (node, &ops, ifn);
1432 return NULL;
1435 /* Dummy implementation of build. */
1437 void
1438 complex_operations_pattern::build (vec_info * /* vinfo */)
1440 gcc_unreachable ();
1444 /* The addsub_pattern. */
1446 class addsub_pattern : public vect_pattern
1448 public:
1449 addsub_pattern (slp_tree *node, internal_fn ifn)
1450 : vect_pattern (node, NULL, ifn) {};
1452 void build (vec_info *) final override;
1454 static vect_pattern*
1455 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1456 slp_tree *);
1459 vect_pattern *
1460 addsub_pattern::recognize (slp_tree_to_load_perm_map_t *,
1461 slp_compat_nodes_map_t *, slp_tree *node_)
1463 slp_tree node = *node_;
1464 if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
1465 || SLP_TREE_CHILDREN (node).length () != 2
1466 || SLP_TREE_LANE_PERMUTATION (node).length () % 2)
1467 return NULL;
1469 /* Match a blend of a plus and a minus op with the same number of plus and
1470 minus lanes on the same operands. */
1471 unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1472 unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1473 if (l0 == l1)
1474 return NULL;
1475 bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
1476 PLUS_EXPR);
1477 if (!l0add_p
1478 && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
1479 return NULL;
1480 bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
1481 PLUS_EXPR);
1482 if (!l1add_p
1483 && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
1484 return NULL;
1486 slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
1487 slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
1488 if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
1489 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
1490 || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
1491 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
1492 return NULL;
1494 for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
1496 std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
1497 /* It has to be alternating -, +, -,
1498 While we could permute the .ADDSUB inputs and the .ADDSUB output
1499 that's only profitable over the add + sub + blend if at least
1500 one of the permute is optimized which we can't determine here. */
1501 if (perm.first != ((i & 1) ? l1 : l0)
1502 || perm.second != i)
1503 return NULL;
1506 /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
1507 (l0add_p), see whether we have FMA variants. We can only form FMAs
1508 if allowed via -ffp-contract=fast. */
1509 if (flag_fp_contract_mode != FP_CONTRACT_FAST
1510 && FLOAT_TYPE_P (SLP_TREE_VECTYPE (l0node)))
1512 else if (!l0add_p
1513 && vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR))
1515 /* (c * d) -+ a */
1516 if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
1517 return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
1519 else if (l0add_p
1520 && vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR))
1522 /* (c * d) +- a */
1523 if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
1524 return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
1527 if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
1528 return new addsub_pattern (node_, IFN_VEC_ADDSUB);
1530 return NULL;
1533 void
1534 addsub_pattern::build (vec_info *vinfo)
1536 slp_tree node = *m_node;
1538 unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1539 unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1541 switch (m_ifn)
1543 case IFN_VEC_ADDSUB:
1545 slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
1546 slp_tree add = SLP_TREE_CHILDREN (node)[l1];
1548 /* Modify the blend node in-place. */
1549 SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
1550 SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
1551 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1552 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1554 /* Build IFN_VEC_ADDSUB from the sub representative operands. */
1555 stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
1556 gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
1557 gimple_assign_rhs1 (rep->stmt),
1558 gimple_assign_rhs2 (rep->stmt));
1559 gimple_call_set_lhs (call, make_ssa_name
1560 (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
1561 gimple_call_set_nothrow (call, true);
1562 gimple_set_bb (call, gimple_bb (rep->stmt));
1563 stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
1564 SLP_TREE_REPRESENTATIVE (node) = new_rep;
1565 STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1566 STMT_SLP_TYPE (new_rep) = pure_slp;
1567 STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1568 STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1569 STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
1570 SLP_TREE_CODE (node) = ERROR_MARK;
1571 SLP_TREE_LANE_PERMUTATION (node).release ();
1573 vect_free_slp_tree (sub);
1574 vect_free_slp_tree (add);
1575 break;
1577 case IFN_VEC_FMADDSUB:
1578 case IFN_VEC_FMSUBADD:
1580 slp_tree sub, add;
1581 if (m_ifn == IFN_VEC_FMADDSUB)
1583 sub = SLP_TREE_CHILDREN (node)[l0];
1584 add = SLP_TREE_CHILDREN (node)[l1];
1586 else /* m_ifn == IFN_VEC_FMSUBADD */
1588 sub = SLP_TREE_CHILDREN (node)[l1];
1589 add = SLP_TREE_CHILDREN (node)[l0];
1591 slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
1592 /* Modify the blend node in-place. */
1593 SLP_TREE_CHILDREN (node).safe_grow (3, true);
1594 SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
1595 SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
1596 SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
1597 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1598 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1599 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
1601 /* Build IFN_VEC_FMADDSUB from the mul/sub representative operands. */
1602 stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
1603 stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
1604 gcall *call = gimple_build_call_internal (m_ifn, 3,
1605 gimple_assign_rhs1 (mrep->stmt),
1606 gimple_assign_rhs2 (mrep->stmt),
1607 gimple_assign_rhs2 (srep->stmt));
1608 gimple_call_set_lhs (call, make_ssa_name
1609 (TREE_TYPE (gimple_assign_lhs (srep->stmt))));
1610 gimple_call_set_nothrow (call, true);
1611 gimple_set_bb (call, gimple_bb (srep->stmt));
1612 stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
1613 SLP_TREE_REPRESENTATIVE (node) = new_rep;
1614 STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1615 STMT_SLP_TYPE (new_rep) = pure_slp;
1616 STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1617 STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1618 STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep));
1619 SLP_TREE_CODE (node) = ERROR_MARK;
1620 SLP_TREE_LANE_PERMUTATION (node).release ();
1622 vect_free_slp_tree (sub);
1623 vect_free_slp_tree (add);
1624 break;
1626 default:;
1630 /*******************************************************************************
1631 * Pattern matching definitions
1632 ******************************************************************************/
1634 #define SLP_PATTERN(x) &x::recognize
1635 vect_pattern_decl_t slp_patterns[]
1637 /* For least amount of back-tracking and more efficient matching
1638 order patterns from the largest to the smallest. Especially if they
1639 overlap in what they can detect. */
1641 SLP_PATTERN (complex_operations_pattern),
1642 SLP_PATTERN (addsub_pattern)
1644 #undef SLP_PATTERN
1646 /* Set the number of SLP pattern matchers available. */
1647 size_t num__slp_patterns = ARRAY_SIZE (slp_patterns);