Daily bump.
[official-gcc.git] / gcc / tree-vect-slp-patterns.cc
blob8adae8a6ec0d0f3e6a3da9040a089190909a50fe
1 /* SLP - Pattern matcher on SLP trees
2 Copyright (C) 2020-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #define INCLUDE_MEMORY
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "fold-const.h"
35 #include "stor-layout.h"
36 #include "gimple-iterator.h"
37 #include "cfgloop.h"
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
40 #include "gimple-walk.h"
41 #include "dbgcnt.h"
42 #include "tree-vector-builder.h"
43 #include "vec-perm-indices.h"
44 #include "gimple-fold.h"
45 #include "internal-fn.h"
47 /* SLP Pattern matching mechanism.
49 This extension to the SLP vectorizer allows one to transform the generated SLP
50 tree based on any pattern. The difference between this and the normal vect
51 pattern matcher is that unlike the former, this matcher allows you to match
52 with instructions that do not belong to the same SSA dominator graph.
54 The only requirement that this pattern matcher has is that you are only
55 only allowed to either match an entire group or none.
57 The pattern matcher currently only allows you to perform replacements to
58 internal functions.
60 Once the patterns are matched it is one way, these cannot be undone. It is
61 currently not supported to match patterns recursively.
63 To add a new pattern, implement the vect_pattern class and add the type to
64 slp_patterns.
68 /*******************************************************************************
69 * vect_pattern class
70 ******************************************************************************/
72 /* Default implementation of recognize that performs matching, validation and
73 replacement of nodes but that can be overriden if required. */
75 static bool
76 vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
78 tree vectype = SLP_TREE_VECTYPE (node);
79 if (ifn == IFN_LAST || !vectype)
80 return false;
82 if (dump_enabled_p ())
83 dump_printf_loc (MSG_NOTE, vect_location,
84 "Found %s pattern in SLP tree\n",
85 internal_fn_name (ifn));
87 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
89 if (dump_enabled_p ())
90 dump_printf_loc (MSG_NOTE, vect_location,
91 "Target supports %s vectorization with mode %T\n",
92 internal_fn_name (ifn), vectype);
94 else
96 if (dump_enabled_p ())
98 if (!vectype)
99 dump_printf_loc (MSG_NOTE, vect_location,
100 "Target does not support vector type for %G\n",
101 STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)));
102 else
103 dump_printf_loc (MSG_NOTE, vect_location,
104 "Target does not support %s for vector type "
105 "%T\n", internal_fn_name (ifn), vectype);
107 return false;
109 return true;
112 /*******************************************************************************
113 * General helper types
114 ******************************************************************************/
116 /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
117 be matched when looking for expressions that we are interested matching for
118 complex numbers addition and mla. */
120 typedef enum _complex_operation : unsigned {
121 PLUS_PLUS,
122 MINUS_PLUS,
123 PLUS_MINUS,
124 MULT_MULT,
125 CMPLX_NONE
126 } complex_operation_t;
128 /*******************************************************************************
129 * General helper functions
130 ******************************************************************************/
132 /* Helper function of linear_loads_p that checks to see if the load permutation
133 is sequential and in monotonically increasing order of loads with no gaps.
136 static inline complex_perm_kinds_t
137 is_linear_load_p (load_permutation_t loads)
139 if (loads.length() == 0)
140 return PERM_UNKNOWN;
142 unsigned load, i;
143 complex_perm_kinds_t candidates[4]
144 = { PERM_ODDODD
145 , PERM_EVENEVEN
146 , PERM_EVENODD
147 , PERM_ODDEVEN
150 int valid_patterns = 4;
151 FOR_EACH_VEC_ELT (loads, i, load)
153 unsigned adj_load = load % 2;
154 if (candidates[0] != PERM_UNKNOWN && adj_load != 1)
156 candidates[0] = PERM_UNKNOWN;
157 valid_patterns--;
159 if (candidates[1] != PERM_UNKNOWN && adj_load != 0)
161 candidates[1] = PERM_UNKNOWN;
162 valid_patterns--;
164 if (candidates[2] != PERM_UNKNOWN && load != i)
166 candidates[2] = PERM_UNKNOWN;
167 valid_patterns--;
169 if (candidates[3] != PERM_UNKNOWN
170 && load != (i % 2 == 0 ? i + 1 : i - 1))
172 candidates[3] = PERM_UNKNOWN;
173 valid_patterns--;
176 if (valid_patterns == 0)
177 return PERM_UNKNOWN;
180 for (i = 0; i < sizeof(candidates); i++)
181 if (candidates[i] != PERM_UNKNOWN)
182 return candidates[i];
184 return PERM_UNKNOWN;
187 /* Combine complex_perm_kinds A and B into a new permute kind that describes the
188 resulting operation. */
190 static inline complex_perm_kinds_t
191 vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
193 if (a == b)
194 return a;
196 if (a == PERM_TOP)
197 return b;
199 if (b == PERM_TOP)
200 return a;
202 return PERM_UNKNOWN;
205 /* Check to see if all loads rooted in ROOT are linear. Linearity is
206 defined as having no gaps between values loaded. */
208 static complex_perm_kinds_t
209 linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
211 if (!root)
212 return PERM_UNKNOWN;
214 unsigned i;
215 complex_perm_kinds_t *tmp;
217 if ((tmp = perm_cache->get (root)) != NULL)
218 return *tmp;
220 complex_perm_kinds_t retval = PERM_UNKNOWN;
221 perm_cache->put (root, retval);
223 /* If it's a load node, then just read the load permute. */
224 if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
226 retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
227 perm_cache->put (root, retval);
228 return retval;
230 else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
232 retval = PERM_TOP;
233 perm_cache->put (root, retval);
234 return retval;
237 complex_perm_kinds_t kind = PERM_TOP;
239 slp_tree child;
240 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
242 complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
243 kind = vect_merge_perms (kind, res);
244 /* Unknown and Top are not valid on blends as they produce no permute. */
245 retval = kind;
246 if (kind == PERM_UNKNOWN || kind == PERM_TOP)
247 return retval;
250 retval = kind;
252 perm_cache->put (root, retval);
253 return retval;
257 /* This function attempts to make a node rooted in NODE is linear. If the node
258 if already linear than the node itself is returned in RESULT.
260 If the node is not linear then a new VEC_PERM_EXPR node is created with a
261 lane permute that when applied will make the node linear. If such a
262 permute cannot be created then FALSE is returned from the function.
264 Here linearity is defined as having a sequential, monotically increasing
265 load position inside the load permute generated by the loads reachable from
266 NODE. */
268 static slp_tree
269 vect_build_swap_evenodd_node (slp_tree node)
271 /* Attempt to linearise the permute. */
272 vec<std::pair<unsigned, unsigned> > zipped;
273 zipped.create (SLP_TREE_LANES (node));
275 for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
277 zipped.quick_push (std::make_pair (0, x+1));
278 zipped.quick_push (std::make_pair (0, x));
281 /* Create the new permute node and store it instead. */
282 slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
283 SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
284 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
285 SLP_TREE_CHILDREN (vnode).quick_push (node);
286 SLP_TREE_REF_COUNT (vnode) = 1;
287 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
288 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
289 SLP_TREE_REF_COUNT (node)++;
290 return vnode;
293 /* Checks to see of the expression represented by NODE is a gimple assign with
294 code CODE. */
296 static inline bool
297 vect_match_expression_p (slp_tree node, tree_code code)
299 if (!node
300 || !SLP_TREE_REPRESENTATIVE (node))
301 return false;
303 gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
304 if (!is_gimple_assign (expr)
305 || gimple_assign_rhs_code (expr) != code)
306 return false;
308 return true;
311 /* Check if the given lane permute in PERMUTES matches an alternating sequence
312 of {even odd even odd ...}. This to account for unrolled loops. Further
313 mode there resulting permute must be linear. */
315 static inline bool
316 vect_check_evenodd_blend (lane_permutation_t &permutes,
317 unsigned even, unsigned odd)
319 if (permutes.length () == 0
320 || permutes.length () % 2 != 0)
321 return false;
323 unsigned val[2] = {even, odd};
324 unsigned seed = 0;
325 for (unsigned i = 0; i < permutes.length (); i++)
326 if (permutes[i].first != val[i % 2]
327 || permutes[i].second != seed++)
328 return false;
330 return true;
333 /* This function will match the two gimple expressions representing NODE1 and
334 NODE2 in parallel and returns the pair operation that represents the two
335 expressions in the two statements.
337 If match is successful then the corresponding complex_operation is
338 returned and the arguments to the two matched operations are returned in OPS.
340 If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
341 from the two nodes alternatingly.
343 If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
345 e.g. the following gimple statements
347 stmt 0 _39 = _37 + _12;
348 stmt 1 _6 = _38 - _36;
350 will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
353 static complex_operation_t
354 vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
355 bool two_operands = true, vec<slp_tree> *ops = NULL)
357 complex_operation_t result = CMPLX_NONE;
359 if (vect_match_expression_p (node1, MINUS_EXPR)
360 && vect_match_expression_p (node2, PLUS_EXPR)
361 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
362 result = MINUS_PLUS;
363 else if (vect_match_expression_p (node1, PLUS_EXPR)
364 && vect_match_expression_p (node2, MINUS_EXPR)
365 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
366 result = PLUS_MINUS;
367 else if (vect_match_expression_p (node1, PLUS_EXPR)
368 && vect_match_expression_p (node2, PLUS_EXPR))
369 result = PLUS_PLUS;
370 else if (vect_match_expression_p (node1, MULT_EXPR)
371 && vect_match_expression_p (node2, MULT_EXPR))
372 result = MULT_MULT;
374 if (result != CMPLX_NONE && ops != NULL)
376 if (two_operands)
378 auto l0node = SLP_TREE_CHILDREN (node1);
379 auto l1node = SLP_TREE_CHILDREN (node2);
381 /* Check if the tree is connected as we expect it. */
382 if (!((l0node[0] == l1node[0] && l0node[1] == l1node[1])
383 || (l0node[0] == l1node[1] && l0node[1] == l1node[0])))
384 return CMPLX_NONE;
386 ops->safe_push (node1);
387 ops->safe_push (node2);
389 return result;
392 /* Overload of vect_detect_pair_op that matches against the representative
393 statements in the children of NODE. It is expected that NODE has exactly
394 two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */
396 static complex_operation_t
397 vect_detect_pair_op (slp_tree node, bool two_operands = true,
398 vec<slp_tree> *ops = NULL)
400 if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR)
401 return CMPLX_NONE;
403 if (SLP_TREE_CHILDREN (node).length () != 2)
404 return CMPLX_NONE;
406 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
407 lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
409 return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
410 ops);
413 /*******************************************************************************
414 * complex_pattern class
415 ******************************************************************************/
417 /* SLP Complex Numbers pattern matching.
419 As an example, the following simple loop:
421 double a[restrict N]; double b[restrict N]; double c[restrict N];
423 for (int i=0; i < N; i+=2)
425 c[i] = a[i] - b[i+1];
426 c[i+1] = a[i+1] + b[i];
429 which represents a complex addition on with a rotation of 90* around the
430 argand plane. i.e. if `a` and `b` were complex numbers then this would be the
431 same as `a + (b * I)`.
433 Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
434 both recognized in order for the pattern to work. As an SLP tree this is
435 represented as
437 +--------------------------------+
438 | stmt 0 *_9 = _10; |
439 | stmt 1 *_15 = _16; |
440 +--------------------------------+
444 +--------------------------------+
445 | stmt 0 _10 = _4 - _8; |
446 | stmt 1 _16 = _12 + _14; |
447 | lane permutation { 0[0] 1[1] } |
448 +--------------------------------+
452 +-----+ | | +-----+
453 | | | | | |
454 +-----| { } |<-----+ +----->| { } --------+
455 | | | +------------------| | |
456 | +-----+ | +-----+ |
457 | | | |
458 | | | |
459 | +------|------------------+ |
460 | | | |
461 v v v v
462 +--------------------------+ +--------------------------------+
463 | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
464 | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
465 | load permutation { 1 0 } | | load permutation { 0 1 } |
466 +--------------------------+ +--------------------------------+
468 The pattern matcher allows you to replace both statements 0 and 1 or none at
469 all. Because this operation is a two operands operation the actual nodes
470 being replaced are those in the { } nodes. The actual scalar statements
471 themselves are not replaced or used during the matching but instead the
472 SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to
473 replace and match on any number of nodes.
475 Because the pattern matcher matches on the representative statement for the
476 SLP node the case of two_operators it allows you to match the children of the
477 node. This is done using the method `recognize ()`.
481 /* The complex_pattern class contains common code for pattern matchers that work
482 on complex numbers. These provide functionality to allow de-construction and
483 validation of sequences depicting/transforming REAL and IMAG pairs. */
485 class complex_pattern : public vect_pattern
487 protected:
488 auto_vec<slp_tree> m_workset;
489 complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
490 : vect_pattern (node, m_ops, ifn)
492 this->m_workset.safe_push (*node);
495 public:
496 void build (vec_info *) override;
498 static internal_fn
499 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
500 vec<slp_tree> *);
503 /* Create a replacement pattern statement for each node in m_node and inserts
504 the new statement into m_node as the new representative statement. The old
505 statement is marked as being in a pattern defined by the new statement. The
506 statement is created as call to internal function IFN with m_num_args
507 arguments.
509 Futhermore the new pattern is also added to the vectorization information
510 structure VINFO and the old statement STMT_INFO is marked as unused while
511 the new statement is marked as used and the number of SLP uses of the new
512 statement is incremented.
514 The newly created SLP nodes are marked as SLP only and will be dissolved
515 if SLP is aborted.
517 The newly created gimple call is returned and the BB remains unchanged.
519 This default method is designed to only match against simple operands where
520 all the input and output types are the same.
523 void
524 complex_pattern::build (vec_info *vinfo)
526 stmt_vec_info stmt_info;
528 auto_vec<tree> args;
529 args.create (this->m_num_args);
530 args.quick_grow_cleared (this->m_num_args);
531 slp_tree node;
532 unsigned ix;
533 stmt_vec_info call_stmt_info;
534 gcall *call_stmt = NULL;
536 /* Now modify the nodes themselves. */
537 FOR_EACH_VEC_ELT (this->m_workset, ix, node)
539 /* Calculate the location of the statement in NODE to replace. */
540 stmt_info = SLP_TREE_REPRESENTATIVE (node);
541 stmt_vec_info reduc_def
542 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
543 gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
544 tree lhs_old_stmt = gimple_get_lhs (old_stmt);
545 tree type = TREE_TYPE (lhs_old_stmt);
547 /* Create the argument set for use by gimple_build_call_internal_vec. */
548 for (unsigned i = 0; i < this->m_num_args; i++)
549 args[i] = lhs_old_stmt;
551 /* Create the new pattern statements. */
552 call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
553 tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
554 gimple_call_set_lhs (call_stmt, var);
555 gimple_set_location (call_stmt, gimple_location (old_stmt));
556 gimple_call_set_nothrow (call_stmt, true);
558 /* Adjust the book-keeping for the new and old statements for use during
559 SLP. This is required to get the right VF and statement during SLP
560 analysis. These changes are created after relevancy has been set for
561 the nodes as such we need to manually update them. Any changes will be
562 undone if SLP is cancelled. */
563 call_stmt_info
564 = vinfo->add_pattern_stmt (call_stmt, stmt_info);
566 /* Make sure to mark the representative statement pure_slp and
567 relevant and transfer reduction info. */
568 STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
569 STMT_SLP_TYPE (call_stmt_info) = pure_slp;
570 STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
572 gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
573 STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
574 STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
576 /* Since we are replacing all the statements in the group with the same
577 thing it doesn't really matter. So just set it every time a new stmt
578 is created. */
579 SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
580 SLP_TREE_LANE_PERMUTATION (node).release ();
581 SLP_TREE_CODE (node) = CALL_EXPR;
585 /*******************************************************************************
586 * complex_add_pattern class
587 ******************************************************************************/
589 class complex_add_pattern : public complex_pattern
591 protected:
592 complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
593 : complex_pattern (node, m_ops, ifn)
595 this->m_num_args = 2;
598 public:
599 void build (vec_info *) final override;
600 static internal_fn
601 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
602 slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
604 static vect_pattern*
605 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
606 slp_tree *);
608 static vect_pattern*
609 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
611 return new complex_add_pattern (node, m_ops, ifn);
615 /* Perform a replacement of the detected complex add pattern with the new
616 instruction sequences. */
618 void
619 complex_add_pattern::build (vec_info *vinfo)
621 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
623 slp_tree node = this->m_ops[0];
624 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
626 /* First re-arrange the children. */
627 SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
628 SLP_TREE_CHILDREN (*this->m_node)[1] =
629 vect_build_swap_evenodd_node (children[1]);
631 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
632 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
633 vect_free_slp_tree (this->m_ops[0]);
634 vect_free_slp_tree (this->m_ops[1]);
636 complex_pattern::build (vinfo);
639 /* Pattern matcher for trying to match complex addition pattern in SLP tree.
641 If no match is found then IFN is set to IFN_LAST.
642 This function matches the patterns shaped as:
644 c[i] = a[i] - b[i+1];
645 c[i+1] = a[i+1] + b[i];
647 If a match occurred then TRUE is returned, else FALSE. The initial match is
648 expected to be in OP1 and the initial match operands in args0. */
650 internal_fn
651 complex_add_pattern::matches (complex_operation_t op,
652 slp_tree_to_load_perm_map_t *perm_cache,
653 slp_compat_nodes_map_t * /* compat_cache */,
654 slp_tree *node, vec<slp_tree> *ops)
656 internal_fn ifn = IFN_LAST;
658 /* Find the two components. Rotation in the complex plane will modify
659 the operations:
661 * Rotation 0: + +
662 * Rotation 90: - +
663 * Rotation 180: - -
664 * Rotation 270: + -
666 Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
667 to care about them here. */
668 if (op == MINUS_PLUS)
669 ifn = IFN_COMPLEX_ADD_ROT90;
670 else if (op == PLUS_MINUS)
671 ifn = IFN_COMPLEX_ADD_ROT270;
672 else
673 return ifn;
675 /* verify that there is a permute, otherwise this isn't a pattern we
676 we support. */
677 gcc_assert (ops->length () == 2);
679 vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
681 /* First node must be unpermuted. */
682 if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
683 return IFN_LAST;
685 /* Second node must be permuted. */
686 if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
687 return IFN_LAST;
689 if (!vect_pattern_validate_optab (ifn, *node))
690 return IFN_LAST;
692 return ifn;
695 /* Attempt to recognize a complex add pattern. */
697 vect_pattern*
698 complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
699 slp_compat_nodes_map_t *compat_cache,
700 slp_tree *node)
702 auto_vec<slp_tree> ops;
703 complex_operation_t op
704 = vect_detect_pair_op (*node, true, &ops);
705 internal_fn ifn
706 = complex_add_pattern::matches (op, perm_cache, compat_cache, node, &ops);
707 if (ifn == IFN_LAST)
708 return NULL;
710 return new complex_add_pattern (node, &ops, ifn);
713 /*******************************************************************************
714 * complex_mul_pattern
715 ******************************************************************************/
717 /* Helper function to check if PERM is KIND or PERM_TOP. */
719 static inline bool
720 is_eq_or_top (slp_tree_to_load_perm_map_t *perm_cache,
721 slp_tree op1, complex_perm_kinds_t kind1,
722 slp_tree op2, complex_perm_kinds_t kind2)
724 complex_perm_kinds_t perm1 = linear_loads_p (perm_cache, op1);
725 if (perm1 != kind1 && perm1 != PERM_TOP)
726 return false;
728 complex_perm_kinds_t perm2 = linear_loads_p (perm_cache, op2);
729 if (perm2 != kind2 && perm2 != PERM_TOP)
730 return false;
732 return true;
735 enum _conj_status { CONJ_NONE, CONJ_FST, CONJ_SND };
737 static inline bool
738 compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache,
739 slp_tree a, int *pa, slp_tree b, int *pb)
741 bool *tmp;
742 std::pair<slp_tree, slp_tree> key = std::make_pair(a, b);
743 if ((tmp = compat_cache->get (key)) != NULL)
744 return *tmp;
746 compat_cache->put (key, false);
748 if (SLP_TREE_CHILDREN (a).length () != SLP_TREE_CHILDREN (b).length ())
749 return false;
751 if (SLP_TREE_DEF_TYPE (a) != SLP_TREE_DEF_TYPE (b))
752 return false;
754 /* Only internal nodes can be loads, as such we can't check further if they
755 are externals. */
756 if (SLP_TREE_DEF_TYPE (a) != vect_internal_def)
758 for (unsigned i = 0; i < SLP_TREE_SCALAR_OPS (a).length (); i++)
760 tree op1 = SLP_TREE_SCALAR_OPS (a)[pa[i % 2]];
761 tree op2 = SLP_TREE_SCALAR_OPS (b)[pb[i % 2]];
762 if (!operand_equal_p (op1, op2, 0))
763 return false;
766 compat_cache->put (key, true);
767 return true;
770 auto a_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (a));
771 auto b_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (b));
773 if (gimple_code (a_stmt) != gimple_code (b_stmt))
774 return false;
776 /* code, children, type, externals, loads, constants */
777 if (gimple_num_args (a_stmt) != gimple_num_args (b_stmt))
778 return false;
780 /* At this point, a and b are known to be the same gimple operations. */
781 if (is_gimple_call (a_stmt))
783 if (!compatible_calls_p (dyn_cast <gcall *> (a_stmt),
784 dyn_cast <gcall *> (b_stmt)))
785 return false;
787 else if (!is_gimple_assign (a_stmt))
788 return false;
789 else
791 tree_code acode = gimple_assign_rhs_code (a_stmt);
792 tree_code bcode = gimple_assign_rhs_code (b_stmt);
793 if ((acode == REALPART_EXPR || acode == IMAGPART_EXPR)
794 && (bcode == REALPART_EXPR || bcode == IMAGPART_EXPR))
795 return true;
797 if (acode != bcode)
798 return false;
801 if (!SLP_TREE_LOAD_PERMUTATION (a).exists ()
802 || !SLP_TREE_LOAD_PERMUTATION (b).exists ())
804 for (unsigned i = 0; i < gimple_num_args (a_stmt); i++)
806 tree t1 = gimple_arg (a_stmt, i);
807 tree t2 = gimple_arg (b_stmt, i);
808 if (TREE_CODE (t1) != TREE_CODE (t2))
809 return false;
811 /* If SSA name then we will need to inspect the children
812 so we can punt here. */
813 if (TREE_CODE (t1) == SSA_NAME)
814 continue;
816 if (!operand_equal_p (t1, t2, 0))
817 return false;
820 else
822 auto dr1 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a));
823 auto dr2 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b));
824 /* Don't check the last dimension as that's checked by the lineary
825 checks. This check is also much stricter than what we need
826 because it doesn't consider loading from adjacent elements
827 in the same struct as loading from the same base object.
828 But for now, I'll play it safe. */
829 if (!same_data_refs (dr1, dr2, 1))
830 return false;
833 for (unsigned i = 0; i < SLP_TREE_CHILDREN (a).length (); i++)
835 if (!compatible_complex_nodes_p (compat_cache,
836 SLP_TREE_CHILDREN (a)[i], pa,
837 SLP_TREE_CHILDREN (b)[i], pb))
838 return false;
841 compat_cache->put (key, true);
842 return true;
845 static inline bool
846 vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
847 slp_compat_nodes_map_t *compat_cache,
848 vec<slp_tree> &left_op,
849 vec<slp_tree> &right_op,
850 bool subtract,
851 enum _conj_status *_status)
853 auto_vec<slp_tree> ops;
854 enum _conj_status stats = CONJ_NONE;
856 /* The complex operations can occur in two layouts and two permute sequences
857 so declare them and re-use them. */
858 int styles[][4] = { { 0, 2, 1, 3} /* {L1, R1} + {L2, R2}. */
859 , { 0, 3, 1, 2} /* {L1, R2} + {L2, R1}. */
862 /* Now for the corresponding permutes that go with these values. */
863 complex_perm_kinds_t perms[][4]
864 = { { PERM_EVENEVEN, PERM_ODDODD, PERM_EVENODD, PERM_ODDEVEN }
865 , { PERM_EVENODD, PERM_ODDEVEN, PERM_EVENEVEN, PERM_ODDODD }
868 /* These permutes are used during comparisons of externals on which
869 we require strict equality. */
870 int cq[][4][2]
871 = { { { 0, 0 }, { 1, 1 }, { 0, 1 }, { 1, 0 } }
872 , { { 0, 1 }, { 1, 0 }, { 0, 0 }, { 1, 1 } }
875 /* Default to style and perm 0, most operations use this one. */
876 int style = 0;
877 int perm = subtract ? 1 : 0;
879 /* Check if we have a negate operation, if so absorb the node and continue
880 looking. */
881 bool neg0 = vect_match_expression_p (right_op[0], NEGATE_EXPR);
882 bool neg1 = vect_match_expression_p (right_op[1], NEGATE_EXPR);
884 /* Determine which style we're looking at. We only have different ones
885 whenever a conjugate is involved. */
886 if (neg0 && neg1)
888 else if (neg0)
890 right_op[0] = SLP_TREE_CHILDREN (right_op[0])[0];
891 stats = CONJ_FST;
892 if (subtract)
893 perm = 0;
895 else if (neg1)
897 right_op[1] = SLP_TREE_CHILDREN (right_op[1])[0];
898 stats = CONJ_SND;
899 perm = 1;
902 *_status = stats;
904 /* Flatten the inputs after we've remapped them. */
905 ops.create (4);
906 ops.safe_splice (left_op);
907 ops.safe_splice (right_op);
909 /* Extract out the elements to check. */
910 slp_tree op0 = ops[styles[style][0]];
911 slp_tree op1 = ops[styles[style][1]];
912 slp_tree op2 = ops[styles[style][2]];
913 slp_tree op3 = ops[styles[style][3]];
915 /* Do cheapest test first. If failed no need to analyze further. */
916 if (linear_loads_p (perm_cache, op0) != perms[perm][0]
917 || linear_loads_p (perm_cache, op1) != perms[perm][1]
918 || !is_eq_or_top (perm_cache, op2, perms[perm][2], op3, perms[perm][3]))
919 return false;
921 return compatible_complex_nodes_p (compat_cache, op0, cq[perm][0], op1,
922 cq[perm][1])
923 && compatible_complex_nodes_p (compat_cache, op2, cq[perm][2], op3,
924 cq[perm][3]);
927 /* This function combines two nodes containing only even and only odd lanes
928 together into a single node which contains the nodes in even/odd order
929 by using a lane permute.
931 The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
932 So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
934 The tree REPRESENTATION is taken from the supplied REP along with the
935 vectype which must be the same between all three nodes.
938 static slp_tree
939 vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
941 vec<std::pair<unsigned, unsigned> > perm;
942 perm.create (SLP_TREE_LANES (rep));
944 for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
946 perm.quick_push (std::make_pair (0, x));
947 perm.quick_push (std::make_pair (1, x+1));
950 slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
951 SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
952 SLP_TREE_LANE_PERMUTATION (vnode) = perm;
954 SLP_TREE_CHILDREN (vnode).create (2);
955 SLP_TREE_CHILDREN (vnode).quick_push (even);
956 SLP_TREE_CHILDREN (vnode).quick_push (odd);
957 SLP_TREE_REF_COUNT (even)++;
958 SLP_TREE_REF_COUNT (odd)++;
959 SLP_TREE_REF_COUNT (vnode) = 1;
961 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
962 gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
963 /* Representation is set to that of the current node as the vectorizer
964 can't deal with VEC_PERMs with no representation, as would be the
965 case with invariants. */
966 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
967 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
968 return vnode;
971 class complex_mul_pattern : public complex_pattern
973 protected:
974 complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
975 : complex_pattern (node, m_ops, ifn)
977 this->m_num_args = 2;
980 public:
981 void build (vec_info *) final override;
982 static internal_fn
983 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
984 slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
986 static vect_pattern*
987 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
988 slp_tree *);
990 static vect_pattern*
991 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
993 return new complex_mul_pattern (node, m_ops, ifn);
998 /* Pattern matcher for trying to match complex multiply and complex multiply
999 and accumulate pattern in SLP tree. If the operation matches then IFN
1000 is set to the operation it matched and the arguments to the two
1001 replacement statements are put in m_ops.
1003 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1005 This function matches the patterns shaped as:
1007 double ax = (b[i+1] * a[i]);
1008 double bx = (a[i+1] * b[i]);
1010 c[i] = c[i] - ax;
1011 c[i+1] = c[i+1] + bx;
1013 If a match occurred then TRUE is returned, else FALSE. The initial match is
1014 expected to be in OP1 and the initial match operands in args0. */
1016 internal_fn
1017 complex_mul_pattern::matches (complex_operation_t op,
1018 slp_tree_to_load_perm_map_t *perm_cache,
1019 slp_compat_nodes_map_t *compat_cache,
1020 slp_tree *node, vec<slp_tree> *ops)
1022 internal_fn ifn = IFN_LAST;
1024 if (op != MINUS_PLUS)
1025 return IFN_LAST;
1027 auto childs = *ops;
1028 auto l0node = SLP_TREE_CHILDREN (childs[0]);
1030 bool mul0 = vect_match_expression_p (l0node[0], MULT_EXPR);
1031 bool mul1 = vect_match_expression_p (l0node[1], MULT_EXPR);
1032 if (!mul0 && !mul1)
1033 return IFN_LAST;
1035 /* Now operand2+4 may lead to another expression. */
1036 auto_vec<slp_tree> left_op, right_op;
1037 slp_tree add0 = NULL;
1039 /* Check if we may be a multiply add. It's only valid to form FMAs
1040 with -ffp-contract=fast. */
1041 if (!mul0
1042 && (flag_fp_contract_mode == FP_CONTRACT_FAST
1043 || !FLOAT_TYPE_P (SLP_TREE_VECTYPE (*node)))
1044 && vect_match_expression_p (l0node[0], PLUS_EXPR))
1046 auto vals = SLP_TREE_CHILDREN (l0node[0]);
1047 /* Check if it's a multiply, otherwise no idea what this is. */
1048 if (!(mul0 = vect_match_expression_p (vals[1], MULT_EXPR)))
1049 return IFN_LAST;
1051 /* Check if the ADD is linear, otherwise it's not valid complex FMA. */
1052 if (linear_loads_p (perm_cache, vals[0]) != PERM_EVENODD)
1053 return IFN_LAST;
1055 left_op.safe_splice (SLP_TREE_CHILDREN (vals[1]));
1056 add0 = vals[0];
1058 else
1059 left_op.safe_splice (SLP_TREE_CHILDREN (l0node[0]));
1061 right_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1063 if (left_op.length () != 2
1064 || right_op.length () != 2
1065 || !mul0
1066 || !mul1
1067 || linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
1068 return IFN_LAST;
1070 enum _conj_status status;
1071 if (!vect_validate_multiplication (perm_cache, compat_cache, left_op,
1072 right_op, false, &status))
1073 return IFN_LAST;
1075 if (status == CONJ_NONE)
1077 if (add0)
1078 ifn = IFN_COMPLEX_FMA;
1079 else
1080 ifn = IFN_COMPLEX_MUL;
1082 else
1084 if(add0)
1085 ifn = IFN_COMPLEX_FMA_CONJ;
1086 else
1087 ifn = IFN_COMPLEX_MUL_CONJ;
1090 if (!vect_pattern_validate_optab (ifn, *node))
1091 return IFN_LAST;
1093 ops->truncate (0);
1094 ops->create (add0 ? 4 : 3);
1096 if (add0)
1097 ops->quick_push (add0);
1099 complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]);
1100 if (kind == PERM_EVENODD || kind == PERM_TOP)
1102 ops->quick_push (left_op[1]);
1103 ops->quick_push (right_op[1]);
1104 ops->quick_push (left_op[0]);
1106 else if (kind == PERM_EVENEVEN && status != CONJ_SND)
1108 ops->quick_push (left_op[0]);
1109 ops->quick_push (right_op[0]);
1110 ops->quick_push (left_op[1]);
1112 else
1114 ops->quick_push (left_op[0]);
1115 ops->quick_push (right_op[1]);
1116 ops->quick_push (left_op[1]);
1119 return ifn;
1122 /* Attempt to recognize a complex mul pattern. */
1124 vect_pattern*
1125 complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1126 slp_compat_nodes_map_t *compat_cache,
1127 slp_tree *node)
1129 auto_vec<slp_tree> ops;
1130 complex_operation_t op
1131 = vect_detect_pair_op (*node, true, &ops);
1132 internal_fn ifn
1133 = complex_mul_pattern::matches (op, perm_cache, compat_cache, node, &ops);
1134 if (ifn == IFN_LAST)
1135 return NULL;
1137 return new complex_mul_pattern (node, &ops, ifn);
1140 /* Perform a replacement of the detected complex mul pattern with the new
1141 instruction sequences. */
1143 void
1144 complex_mul_pattern::build (vec_info *vinfo)
1146 slp_tree node;
1147 unsigned i;
1148 switch (this->m_ifn)
1150 case IFN_COMPLEX_MUL:
1151 case IFN_COMPLEX_MUL_CONJ:
1153 slp_tree newnode
1154 = vect_build_combine_node (this->m_ops[0], this->m_ops[1],
1155 *this->m_node);
1156 SLP_TREE_REF_COUNT (this->m_ops[2])++;
1158 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1159 vect_free_slp_tree (node);
1161 /* First re-arrange the children. */
1162 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
1163 SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
1164 SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1165 break;
1167 case IFN_COMPLEX_FMA:
1168 case IFN_COMPLEX_FMA_CONJ:
1170 SLP_TREE_REF_COUNT (this->m_ops[0])++;
1171 slp_tree newnode
1172 = vect_build_combine_node (this->m_ops[1], this->m_ops[2],
1173 *this->m_node);
1174 SLP_TREE_REF_COUNT (this->m_ops[3])++;
1176 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1177 vect_free_slp_tree (node);
1179 /* First re-arrange the children. */
1180 SLP_TREE_CHILDREN (*this->m_node).safe_grow (3);
1181 SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[3];
1182 SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1183 SLP_TREE_CHILDREN (*this->m_node)[2] = this->m_ops[0];
1185 /* Tell the builder to expect an extra argument. */
1186 this->m_num_args++;
1187 break;
1189 default:
1190 gcc_unreachable ();
1193 /* And then rewrite the node itself. */
1194 complex_pattern::build (vinfo);
1197 /*******************************************************************************
1198 * complex_fms_pattern class
1199 ******************************************************************************/
1201 class complex_fms_pattern : public complex_pattern
1203 protected:
1204 complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1205 : complex_pattern (node, m_ops, ifn)
1207 this->m_num_args = 3;
1210 public:
1211 void build (vec_info *) final override;
1212 static internal_fn
1213 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1214 slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
1216 static vect_pattern*
1217 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1218 slp_tree *);
1220 static vect_pattern*
1221 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1223 return new complex_fms_pattern (node, m_ops, ifn);
1228 /* Pattern matcher for trying to match complex multiply and subtract pattern
1229 in SLP tree. If the operation matches then IFN is set to the operation
1230 it matched and the arguments to the two replacement statements are put in
1231 m_ops.
1233 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1235 This function matches the patterns shaped as:
1237 double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1238 double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1240 c[i] = c[i] - ax;
1241 c[i+1] = c[i+1] + bx;
1243 If a match occurred then TRUE is returned, else FALSE. The initial match is
1244 expected to be in OP1 and the initial match operands in args0. */
1246 internal_fn
1247 complex_fms_pattern::matches (complex_operation_t op,
1248 slp_tree_to_load_perm_map_t *perm_cache,
1249 slp_compat_nodes_map_t *compat_cache,
1250 slp_tree * ref_node, vec<slp_tree> *ops)
1252 internal_fn ifn = IFN_LAST;
1254 /* We need to ignore the two_operands nodes that may also match,
1255 for that we can check if they have any scalar statements and also
1256 check that it's not a permute node as we're looking for a normal
1257 MINUS_EXPR operation. */
1258 if (op != CMPLX_NONE)
1259 return IFN_LAST;
1261 slp_tree root = *ref_node;
1262 if (!vect_match_expression_p (root, MINUS_EXPR))
1263 return IFN_LAST;
1265 /* TODO: Support invariants here, with the new layout CADD now
1266 can match before we get a chance to try CFMS. */
1267 auto nodes = SLP_TREE_CHILDREN (root);
1268 if (!vect_match_expression_p (nodes[1], MULT_EXPR)
1269 || vect_detect_pair_op (nodes[0]) != PLUS_MINUS)
1270 return IFN_LAST;
1272 auto childs = SLP_TREE_CHILDREN (nodes[0]);
1273 auto l0node = SLP_TREE_CHILDREN (childs[0]);
1275 /* Now operand2+4 may lead to another expression. */
1276 auto_vec<slp_tree> left_op, right_op;
1277 left_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1278 right_op.safe_splice (SLP_TREE_CHILDREN (nodes[1]));
1280 /* If these nodes don't have any children then they're
1281 not ones we're interested in. */
1282 if (left_op.length () != 2
1283 || right_op.length () != 2
1284 || !vect_match_expression_p (l0node[1], MULT_EXPR))
1285 return IFN_LAST;
1287 enum _conj_status status;
1288 if (!vect_validate_multiplication (perm_cache, compat_cache, right_op,
1289 left_op, true, &status))
1290 return IFN_LAST;
1292 if (status == CONJ_NONE)
1293 ifn = IFN_COMPLEX_FMS;
1294 else
1295 ifn = IFN_COMPLEX_FMS_CONJ;
1297 if (!vect_pattern_validate_optab (ifn, *ref_node))
1298 return IFN_LAST;
1300 ops->truncate (0);
1301 ops->create (4);
1303 complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]);
1304 if (kind == PERM_EVENODD)
1306 ops->quick_push (l0node[0]);
1307 ops->quick_push (right_op[0]);
1308 ops->quick_push (right_op[1]);
1309 ops->quick_push (left_op[1]);
1311 else
1313 ops->quick_push (l0node[0]);
1314 ops->quick_push (right_op[1]);
1315 ops->quick_push (right_op[0]);
1316 ops->quick_push (left_op[0]);
1319 return ifn;
1322 /* Attempt to recognize a complex mul pattern. */
1324 vect_pattern*
1325 complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1326 slp_compat_nodes_map_t *compat_cache,
1327 slp_tree *node)
1329 auto_vec<slp_tree> ops;
1330 complex_operation_t op
1331 = vect_detect_pair_op (*node, true, &ops);
1332 internal_fn ifn
1333 = complex_fms_pattern::matches (op, perm_cache, compat_cache, node, &ops);
1334 if (ifn == IFN_LAST)
1335 return NULL;
1337 return new complex_fms_pattern (node, &ops, ifn);
1340 /* Perform a replacement of the detected complex mul pattern with the new
1341 instruction sequences. */
1343 void
1344 complex_fms_pattern::build (vec_info *vinfo)
1346 slp_tree node;
1347 unsigned i;
1348 slp_tree newnode =
1349 vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
1350 SLP_TREE_REF_COUNT (this->m_ops[0])++;
1351 SLP_TREE_REF_COUNT (this->m_ops[1])++;
1353 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1354 vect_free_slp_tree (node);
1356 SLP_TREE_CHILDREN (*this->m_node).release ();
1357 SLP_TREE_CHILDREN (*this->m_node).create (3);
1359 /* First re-arrange the children. */
1360 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
1361 SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
1362 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
1364 /* And then rewrite the node itself. */
1365 complex_pattern::build (vinfo);
1368 /*******************************************************************************
1369 * complex_operations_pattern class
1370 ******************************************************************************/
1372 /* This function combines all the existing pattern matchers above into one class
1373 that shares the functionality between them. The initial match is shared
1374 between all complex operations. */
1376 class complex_operations_pattern : public complex_pattern
1378 protected:
1379 complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
1380 internal_fn ifn)
1381 : complex_pattern (node, m_ops, ifn)
1383 this->m_num_args = 0;
1386 public:
1387 void build (vec_info *) final override;
1388 static internal_fn
1389 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1390 slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
1392 static vect_pattern*
1393 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1394 slp_tree *);
1397 /* Dummy matches implementation for proxy object. */
1399 internal_fn
1400 complex_operations_pattern::
1401 matches (complex_operation_t /* op */,
1402 slp_tree_to_load_perm_map_t * /* perm_cache */,
1403 slp_compat_nodes_map_t * /* compat_cache */,
1404 slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
1406 return IFN_LAST;
1409 /* Attempt to recognize a complex mul pattern. */
1411 vect_pattern*
1412 complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1413 slp_compat_nodes_map_t *ccache,
1414 slp_tree *node)
1416 auto_vec<slp_tree> ops;
1417 complex_operation_t op
1418 = vect_detect_pair_op (*node, true, &ops);
1419 internal_fn ifn = IFN_LAST;
1421 ifn = complex_fms_pattern::matches (op, perm_cache, ccache, node, &ops);
1422 if (ifn != IFN_LAST)
1423 return complex_fms_pattern::mkInstance (node, &ops, ifn);
1425 ifn = complex_mul_pattern::matches (op, perm_cache, ccache, node, &ops);
1426 if (ifn != IFN_LAST)
1427 return complex_mul_pattern::mkInstance (node, &ops, ifn);
1429 ifn = complex_add_pattern::matches (op, perm_cache, ccache, node, &ops);
1430 if (ifn != IFN_LAST)
1431 return complex_add_pattern::mkInstance (node, &ops, ifn);
1433 return NULL;
1436 /* Dummy implementation of build. */
1438 void
1439 complex_operations_pattern::build (vec_info * /* vinfo */)
1441 gcc_unreachable ();
1445 /* The addsub_pattern. */
1447 class addsub_pattern : public vect_pattern
1449 public:
1450 addsub_pattern (slp_tree *node, internal_fn ifn)
1451 : vect_pattern (node, NULL, ifn) {};
1453 void build (vec_info *) final override;
1455 static vect_pattern*
1456 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1457 slp_tree *);
1460 vect_pattern *
1461 addsub_pattern::recognize (slp_tree_to_load_perm_map_t *,
1462 slp_compat_nodes_map_t *, slp_tree *node_)
1464 slp_tree node = *node_;
1465 if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
1466 || SLP_TREE_CHILDREN (node).length () != 2
1467 || SLP_TREE_LANE_PERMUTATION (node).length () % 2)
1468 return NULL;
1470 /* Match a blend of a plus and a minus op with the same number of plus and
1471 minus lanes on the same operands. */
1472 unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1473 unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1474 if (l0 == l1)
1475 return NULL;
1476 bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
1477 PLUS_EXPR);
1478 if (!l0add_p
1479 && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
1480 return NULL;
1481 bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
1482 PLUS_EXPR);
1483 if (!l1add_p
1484 && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
1485 return NULL;
1487 slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
1488 slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
1489 if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
1490 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
1491 || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
1492 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
1493 return NULL;
1495 for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
1497 std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
1498 /* It has to be alternating -, +, -,
1499 While we could permute the .ADDSUB inputs and the .ADDSUB output
1500 that's only profitable over the add + sub + blend if at least
1501 one of the permute is optimized which we can't determine here. */
1502 if (perm.first != ((i & 1) ? l1 : l0)
1503 || perm.second != i)
1504 return NULL;
1507 /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
1508 (l0add_p), see whether we have FMA variants. We can only form FMAs
1509 if allowed via -ffp-contract=fast. */
1510 if (flag_fp_contract_mode != FP_CONTRACT_FAST
1511 && FLOAT_TYPE_P (SLP_TREE_VECTYPE (l0node)))
1513 else if (!l0add_p
1514 && vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR))
1516 /* (c * d) -+ a */
1517 if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
1518 return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
1520 else if (l0add_p
1521 && vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR))
1523 /* (c * d) +- a */
1524 if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
1525 return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
1528 if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
1529 return new addsub_pattern (node_, IFN_VEC_ADDSUB);
1531 return NULL;
1534 void
1535 addsub_pattern::build (vec_info *vinfo)
1537 slp_tree node = *m_node;
1539 unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1540 unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1542 switch (m_ifn)
1544 case IFN_VEC_ADDSUB:
1546 slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
1547 slp_tree add = SLP_TREE_CHILDREN (node)[l1];
1549 /* Modify the blend node in-place. */
1550 SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
1551 SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
1552 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1553 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1555 /* Build IFN_VEC_ADDSUB from the sub representative operands. */
1556 stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
1557 gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
1558 gimple_assign_rhs1 (rep->stmt),
1559 gimple_assign_rhs2 (rep->stmt));
1560 gimple_call_set_lhs (call, make_ssa_name
1561 (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
1562 gimple_call_set_nothrow (call, true);
1563 gimple_set_bb (call, gimple_bb (rep->stmt));
1564 stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
1565 SLP_TREE_REPRESENTATIVE (node) = new_rep;
1566 STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1567 STMT_SLP_TYPE (new_rep) = pure_slp;
1568 STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1569 STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1570 STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
1571 SLP_TREE_CODE (node) = ERROR_MARK;
1572 SLP_TREE_LANE_PERMUTATION (node).release ();
1574 vect_free_slp_tree (sub);
1575 vect_free_slp_tree (add);
1576 break;
1578 case IFN_VEC_FMADDSUB:
1579 case IFN_VEC_FMSUBADD:
1581 slp_tree sub, add;
1582 if (m_ifn == IFN_VEC_FMADDSUB)
1584 sub = SLP_TREE_CHILDREN (node)[l0];
1585 add = SLP_TREE_CHILDREN (node)[l1];
1587 else /* m_ifn == IFN_VEC_FMSUBADD */
1589 sub = SLP_TREE_CHILDREN (node)[l1];
1590 add = SLP_TREE_CHILDREN (node)[l0];
1592 slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
1593 /* Modify the blend node in-place. */
1594 SLP_TREE_CHILDREN (node).safe_grow (3, true);
1595 SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
1596 SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
1597 SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
1598 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1599 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1600 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
1602 /* Build IFN_VEC_FMADDSUB from the mul/sub representative operands. */
1603 stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
1604 stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
1605 gcall *call = gimple_build_call_internal (m_ifn, 3,
1606 gimple_assign_rhs1 (mrep->stmt),
1607 gimple_assign_rhs2 (mrep->stmt),
1608 gimple_assign_rhs2 (srep->stmt));
1609 gimple_call_set_lhs (call, make_ssa_name
1610 (TREE_TYPE (gimple_assign_lhs (srep->stmt))));
1611 gimple_call_set_nothrow (call, true);
1612 gimple_set_bb (call, gimple_bb (srep->stmt));
1613 stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
1614 SLP_TREE_REPRESENTATIVE (node) = new_rep;
1615 STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1616 STMT_SLP_TYPE (new_rep) = pure_slp;
1617 STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1618 STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1619 STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep));
1620 SLP_TREE_CODE (node) = ERROR_MARK;
1621 SLP_TREE_LANE_PERMUTATION (node).release ();
1623 vect_free_slp_tree (sub);
1624 vect_free_slp_tree (add);
1625 break;
1627 default:;
1631 /*******************************************************************************
1632 * Pattern matching definitions
1633 ******************************************************************************/
1635 #define SLP_PATTERN(x) &x::recognize
1636 vect_pattern_decl_t slp_patterns[]
1638 /* For least amount of back-tracking and more efficient matching
1639 order patterns from the largest to the smallest. Especially if they
1640 overlap in what they can detect. */
1642 SLP_PATTERN (complex_operations_pattern),
1643 SLP_PATTERN (addsub_pattern)
1645 #undef SLP_PATTERN
1647 /* Set the number of SLP pattern matchers available. */
1648 size_t num__slp_patterns = ARRAY_SIZE (slp_patterns);