Move PREFERRED_DEBUGGING_TYPE define in pa64-hpux.h to pa.h
[official-gcc.git] / gcc / tree-vect-slp-patterns.c
blobe08a15ebd92638ca32171b361412ec33f0116367
1 /* SLP - Pattern matcher on SLP trees
2 Copyright (C) 2020-2021 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "gimple-iterator.h"
36 #include "cfgloop.h"
37 #include "tree-vectorizer.h"
38 #include "langhooks.h"
39 #include "gimple-walk.h"
40 #include "dbgcnt.h"
41 #include "tree-vector-builder.h"
42 #include "vec-perm-indices.h"
43 #include "gimple-fold.h"
44 #include "internal-fn.h"
46 /* SLP Pattern matching mechanism.
48 This extension to the SLP vectorizer allows one to transform the generated SLP
49 tree based on any pattern. The difference between this and the normal vect
50 pattern matcher is that unlike the former, this matcher allows you to match
51 with instructions that do not belong to the same SSA dominator graph.
53 The only requirement that this pattern matcher has is that you are only
54 only allowed to either match an entire group or none.
56 The pattern matcher currently only allows you to perform replacements to
57 internal functions.
59 Once the patterns are matched it is one way, these cannot be undone. It is
60 currently not supported to match patterns recursively.
62 To add a new pattern, implement the vect_pattern class and add the type to
63 slp_patterns.
67 /*******************************************************************************
68 * vect_pattern class
69 ******************************************************************************/
71 /* Default implementation of recognize that performs matching, validation and
72 replacement of nodes but that can be overriden if required. */
74 static bool
75 vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
77 tree vectype = SLP_TREE_VECTYPE (node);
78 if (ifn == IFN_LAST || !vectype)
79 return false;
81 if (dump_enabled_p ())
82 dump_printf_loc (MSG_NOTE, vect_location,
83 "Found %s pattern in SLP tree\n",
84 internal_fn_name (ifn));
86 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
88 if (dump_enabled_p ())
89 dump_printf_loc (MSG_NOTE, vect_location,
90 "Target supports %s vectorization with mode %T\n",
91 internal_fn_name (ifn), vectype);
93 else
95 if (dump_enabled_p ())
97 if (!vectype)
98 dump_printf_loc (MSG_NOTE, vect_location,
99 "Target does not support vector type for %T\n",
100 SLP_TREE_DEF_TYPE (node));
101 else
102 dump_printf_loc (MSG_NOTE, vect_location,
103 "Target does not support %s for vector type "
104 "%T\n", internal_fn_name (ifn), vectype);
106 return false;
108 return true;
111 /*******************************************************************************
112 * General helper types
113 ******************************************************************************/
115 /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
116 be matched when looking for expressions that we are interested matching for
117 complex numbers addition and mla. */
119 typedef enum _complex_operation : unsigned {
120 PLUS_PLUS,
121 MINUS_PLUS,
122 PLUS_MINUS,
123 MULT_MULT,
124 CMPLX_NONE
125 } complex_operation_t;
127 /*******************************************************************************
128 * General helper functions
129 ******************************************************************************/
131 /* Helper function of linear_loads_p that checks to see if the load permutation
132 is sequential and in monotonically increasing order of loads with no gaps.
135 static inline complex_perm_kinds_t
136 is_linear_load_p (load_permutation_t loads)
138 if (loads.length() == 0)
139 return PERM_UNKNOWN;
141 unsigned load, i;
142 complex_perm_kinds_t candidates[4]
143 = { PERM_ODDODD
144 , PERM_EVENEVEN
145 , PERM_EVENODD
146 , PERM_ODDEVEN
149 int valid_patterns = 4;
150 FOR_EACH_VEC_ELT (loads, i, load)
152 if (candidates[0] != PERM_UNKNOWN && load != 1)
154 candidates[0] = PERM_UNKNOWN;
155 valid_patterns--;
157 if (candidates[1] != PERM_UNKNOWN && load != 0)
159 candidates[1] = PERM_UNKNOWN;
160 valid_patterns--;
162 if (candidates[2] != PERM_UNKNOWN && load != i)
164 candidates[2] = PERM_UNKNOWN;
165 valid_patterns--;
167 if (candidates[3] != PERM_UNKNOWN
168 && load != (i % 2 == 0 ? i + 1 : i - 1))
170 candidates[3] = PERM_UNKNOWN;
171 valid_patterns--;
174 if (valid_patterns == 0)
175 return PERM_UNKNOWN;
178 for (i = 0; i < sizeof(candidates); i++)
179 if (candidates[i] != PERM_UNKNOWN)
180 return candidates[i];
182 return PERM_UNKNOWN;
185 /* Combine complex_perm_kinds A and B into a new permute kind that describes the
186 resulting operation. */
188 static inline complex_perm_kinds_t
189 vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
191 if (a == b)
192 return a;
194 if (a == PERM_TOP)
195 return b;
197 if (b == PERM_TOP)
198 return a;
200 return PERM_UNKNOWN;
203 /* Check to see if all loads rooted in ROOT are linear. Linearity is
204 defined as having no gaps between values loaded. */
206 static complex_perm_kinds_t
207 linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
209 if (!root)
210 return PERM_UNKNOWN;
212 unsigned i;
213 complex_perm_kinds_t *tmp;
215 if ((tmp = perm_cache->get (root)) != NULL)
216 return *tmp;
218 complex_perm_kinds_t retval = PERM_UNKNOWN;
219 perm_cache->put (root, retval);
221 /* If it's a load node, then just read the load permute. */
222 if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
224 retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
225 perm_cache->put (root, retval);
226 return retval;
228 else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
230 retval = PERM_TOP;
231 perm_cache->put (root, retval);
232 return retval;
235 complex_perm_kinds_t kind = PERM_TOP;
237 slp_tree child;
238 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
240 complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
241 kind = vect_merge_perms (kind, res);
242 /* Unknown and Top are not valid on blends as they produce no permute. */
243 retval = kind;
244 if (kind == PERM_UNKNOWN || kind == PERM_TOP)
245 return retval;
248 retval = kind;
250 perm_cache->put (root, retval);
251 return retval;
255 /* This function attempts to make a node rooted in NODE is linear. If the node
256 if already linear than the node itself is returned in RESULT.
258 If the node is not linear then a new VEC_PERM_EXPR node is created with a
259 lane permute that when applied will make the node linear. If such a
260 permute cannot be created then FALSE is returned from the function.
262 Here linearity is defined as having a sequential, monotically increasing
263 load position inside the load permute generated by the loads reachable from
264 NODE. */
266 static slp_tree
267 vect_build_swap_evenodd_node (slp_tree node)
269 /* Attempt to linearise the permute. */
270 vec<std::pair<unsigned, unsigned> > zipped;
271 zipped.create (SLP_TREE_LANES (node));
273 for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
275 zipped.quick_push (std::make_pair (0, x+1));
276 zipped.quick_push (std::make_pair (0, x));
279 /* Create the new permute node and store it instead. */
280 slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
281 SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
282 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
283 SLP_TREE_CHILDREN (vnode).quick_push (node);
284 SLP_TREE_REF_COUNT (vnode) = 1;
285 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
286 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
287 SLP_TREE_REF_COUNT (node)++;
288 return vnode;
291 /* Checks to see of the expression represented by NODE is a gimple assign with
292 code CODE. */
294 static inline bool
295 vect_match_expression_p (slp_tree node, tree_code code)
297 if (!node
298 || !SLP_TREE_REPRESENTATIVE (node))
299 return false;
301 gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
302 if (!is_gimple_assign (expr)
303 || gimple_assign_rhs_code (expr) != code)
304 return false;
306 return true;
309 /* Check if the given lane permute in PERMUTES matches an alternating sequence
310 of {even odd even odd ...}. This to account for unrolled loops. Further
311 mode there resulting permute must be linear. */
313 static inline bool
314 vect_check_evenodd_blend (lane_permutation_t &permutes,
315 unsigned even, unsigned odd)
317 if (permutes.length () == 0
318 || permutes.length () % 2 != 0)
319 return false;
321 unsigned val[2] = {even, odd};
322 unsigned seed = 0;
323 for (unsigned i = 0; i < permutes.length (); i++)
324 if (permutes[i].first != val[i % 2]
325 || permutes[i].second != seed++)
326 return false;
328 return true;
331 /* This function will match the two gimple expressions representing NODE1 and
332 NODE2 in parallel and returns the pair operation that represents the two
333 expressions in the two statements.
335 If match is successful then the corresponding complex_operation is
336 returned and the arguments to the two matched operations are returned in OPS.
338 If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
339 from the two nodes alternatingly.
341 If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
343 e.g. the following gimple statements
345 stmt 0 _39 = _37 + _12;
346 stmt 1 _6 = _38 - _36;
348 will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
351 static complex_operation_t
352 vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
353 bool two_operands = true, vec<slp_tree> *ops = NULL)
355 complex_operation_t result = CMPLX_NONE;
357 if (vect_match_expression_p (node1, MINUS_EXPR)
358 && vect_match_expression_p (node2, PLUS_EXPR)
359 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
360 result = MINUS_PLUS;
361 else if (vect_match_expression_p (node1, PLUS_EXPR)
362 && vect_match_expression_p (node2, MINUS_EXPR)
363 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
364 result = PLUS_MINUS;
365 else if (vect_match_expression_p (node1, PLUS_EXPR)
366 && vect_match_expression_p (node2, PLUS_EXPR))
367 result = PLUS_PLUS;
368 else if (vect_match_expression_p (node1, MULT_EXPR)
369 && vect_match_expression_p (node2, MULT_EXPR))
370 result = MULT_MULT;
372 if (result != CMPLX_NONE && ops != NULL)
374 if (two_operands)
376 auto l0node = SLP_TREE_CHILDREN (node1);
377 auto l1node = SLP_TREE_CHILDREN (node2);
379 /* Check if the tree is connected as we expect it. */
380 if (!((l0node[0] == l1node[0] && l0node[1] == l1node[1])
381 || (l0node[0] == l1node[1] && l0node[1] == l1node[0])))
382 return CMPLX_NONE;
384 ops->safe_push (node1);
385 ops->safe_push (node2);
387 return result;
390 /* Overload of vect_detect_pair_op that matches against the representative
391 statements in the children of NODE. It is expected that NODE has exactly
392 two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */
394 static complex_operation_t
395 vect_detect_pair_op (slp_tree node, bool two_operands = true,
396 vec<slp_tree> *ops = NULL)
398 if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR)
399 return CMPLX_NONE;
401 if (SLP_TREE_CHILDREN (node).length () != 2)
402 return CMPLX_NONE;
404 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
405 lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
407 return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
408 ops);
411 /*******************************************************************************
412 * complex_pattern class
413 ******************************************************************************/
415 /* SLP Complex Numbers pattern matching.
417 As an example, the following simple loop:
419 double a[restrict N]; double b[restrict N]; double c[restrict N];
421 for (int i=0; i < N; i+=2)
423 c[i] = a[i] - b[i+1];
424 c[i+1] = a[i+1] + b[i];
427 which represents a complex addition on with a rotation of 90* around the
428 argand plane. i.e. if `a` and `b` were complex numbers then this would be the
429 same as `a + (b * I)`.
431 Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
432 both recognized in order for the pattern to work. As an SLP tree this is
433 represented as
435 +--------------------------------+
436 | stmt 0 *_9 = _10; |
437 | stmt 1 *_15 = _16; |
438 +--------------------------------+
442 +--------------------------------+
443 | stmt 0 _10 = _4 - _8; |
444 | stmt 1 _16 = _12 + _14; |
445 | lane permutation { 0[0] 1[1] } |
446 +--------------------------------+
450 +-----+ | | +-----+
451 | | | | | |
452 +-----| { } |<-----+ +----->| { } --------+
453 | | | +------------------| | |
454 | +-----+ | +-----+ |
455 | | | |
456 | | | |
457 | +------|------------------+ |
458 | | | |
459 v v v v
460 +--------------------------+ +--------------------------------+
461 | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
462 | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
463 | load permutation { 1 0 } | | load permutation { 0 1 } |
464 +--------------------------+ +--------------------------------+
466 The pattern matcher allows you to replace both statements 0 and 1 or none at
467 all. Because this operation is a two operands operation the actual nodes
468 being replaced are those in the { } nodes. The actual scalar statements
469 themselves are not replaced or used during the matching but instead the
470 SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to
471 replace and match on any number of nodes.
473 Because the pattern matcher matches on the representative statement for the
474 SLP node the case of two_operators it allows you to match the children of the
475 node. This is done using the method `recognize ()`.
479 /* The complex_pattern class contains common code for pattern matchers that work
480 on complex numbers. These provide functionality to allow de-construction and
481 validation of sequences depicting/transforming REAL and IMAG pairs. */
483 class complex_pattern : public vect_pattern
485 protected:
486 auto_vec<slp_tree> m_workset;
487 complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
488 : vect_pattern (node, m_ops, ifn)
490 this->m_workset.safe_push (*node);
493 public:
494 void build (vec_info *);
496 static internal_fn
497 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
498 vec<slp_tree> *);
501 /* Create a replacement pattern statement for each node in m_node and inserts
502 the new statement into m_node as the new representative statement. The old
503 statement is marked as being in a pattern defined by the new statement. The
504 statement is created as call to internal function IFN with m_num_args
505 arguments.
507 Futhermore the new pattern is also added to the vectorization information
508 structure VINFO and the old statement STMT_INFO is marked as unused while
509 the new statement is marked as used and the number of SLP uses of the new
510 statement is incremented.
512 The newly created SLP nodes are marked as SLP only and will be dissolved
513 if SLP is aborted.
515 The newly created gimple call is returned and the BB remains unchanged.
517 This default method is designed to only match against simple operands where
518 all the input and output types are the same.
521 void
522 complex_pattern::build (vec_info *vinfo)
524 stmt_vec_info stmt_info;
526 auto_vec<tree> args;
527 args.create (this->m_num_args);
528 args.quick_grow_cleared (this->m_num_args);
529 slp_tree node;
530 unsigned ix;
531 stmt_vec_info call_stmt_info;
532 gcall *call_stmt = NULL;
534 /* Now modify the nodes themselves. */
535 FOR_EACH_VEC_ELT (this->m_workset, ix, node)
537 /* Calculate the location of the statement in NODE to replace. */
538 stmt_info = SLP_TREE_REPRESENTATIVE (node);
539 stmt_vec_info reduc_def
540 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
541 gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
542 tree lhs_old_stmt = gimple_get_lhs (old_stmt);
543 tree type = TREE_TYPE (lhs_old_stmt);
545 /* Create the argument set for use by gimple_build_call_internal_vec. */
546 for (unsigned i = 0; i < this->m_num_args; i++)
547 args[i] = lhs_old_stmt;
549 /* Create the new pattern statements. */
550 call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
551 tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
552 gimple_call_set_lhs (call_stmt, var);
553 gimple_set_location (call_stmt, gimple_location (old_stmt));
554 gimple_call_set_nothrow (call_stmt, true);
556 /* Adjust the book-keeping for the new and old statements for use during
557 SLP. This is required to get the right VF and statement during SLP
558 analysis. These changes are created after relevancy has been set for
559 the nodes as such we need to manually update them. Any changes will be
560 undone if SLP is cancelled. */
561 call_stmt_info
562 = vinfo->add_pattern_stmt (call_stmt, stmt_info);
564 /* Make sure to mark the representative statement pure_slp and
565 relevant and transfer reduction info. */
566 STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
567 STMT_SLP_TYPE (call_stmt_info) = pure_slp;
568 STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
570 gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
571 STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
572 STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
574 /* Since we are replacing all the statements in the group with the same
575 thing it doesn't really matter. So just set it every time a new stmt
576 is created. */
577 SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
578 SLP_TREE_LANE_PERMUTATION (node).release ();
579 SLP_TREE_CODE (node) = CALL_EXPR;
583 /*******************************************************************************
584 * complex_add_pattern class
585 ******************************************************************************/
587 class complex_add_pattern : public complex_pattern
589 protected:
590 complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
591 : complex_pattern (node, m_ops, ifn)
593 this->m_num_args = 2;
596 public:
597 void build (vec_info *);
598 static internal_fn
599 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
600 vec<slp_tree> *);
602 static vect_pattern*
603 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
605 static vect_pattern*
606 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
608 return new complex_add_pattern (node, m_ops, ifn);
612 /* Perform a replacement of the detected complex add pattern with the new
613 instruction sequences. */
615 void
616 complex_add_pattern::build (vec_info *vinfo)
618 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
620 slp_tree node = this->m_ops[0];
621 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
623 /* First re-arrange the children. */
624 SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
625 SLP_TREE_CHILDREN (*this->m_node)[1] =
626 vect_build_swap_evenodd_node (children[1]);
628 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
629 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
630 vect_free_slp_tree (this->m_ops[0]);
631 vect_free_slp_tree (this->m_ops[1]);
633 complex_pattern::build (vinfo);
636 /* Pattern matcher for trying to match complex addition pattern in SLP tree.
638 If no match is found then IFN is set to IFN_LAST.
639 This function matches the patterns shaped as:
641 c[i] = a[i] - b[i+1];
642 c[i+1] = a[i+1] + b[i];
644 If a match occurred then TRUE is returned, else FALSE. The initial match is
645 expected to be in OP1 and the initial match operands in args0. */
647 internal_fn
648 complex_add_pattern::matches (complex_operation_t op,
649 slp_tree_to_load_perm_map_t *perm_cache,
650 slp_tree *node, vec<slp_tree> *ops)
652 internal_fn ifn = IFN_LAST;
654 /* Find the two components. Rotation in the complex plane will modify
655 the operations:
657 * Rotation 0: + +
658 * Rotation 90: - +
659 * Rotation 180: - -
660 * Rotation 270: + -
662 Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
663 to care about them here. */
664 if (op == MINUS_PLUS)
665 ifn = IFN_COMPLEX_ADD_ROT90;
666 else if (op == PLUS_MINUS)
667 ifn = IFN_COMPLEX_ADD_ROT270;
668 else
669 return ifn;
671 /* verify that there is a permute, otherwise this isn't a pattern we
672 we support. */
673 gcc_assert (ops->length () == 2);
675 vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
677 /* First node must be unpermuted. */
678 if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
679 return IFN_LAST;
681 /* Second node must be permuted. */
682 if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
683 return IFN_LAST;
685 if (!vect_pattern_validate_optab (ifn, *node))
686 return IFN_LAST;
688 return ifn;
691 /* Attempt to recognize a complex add pattern. */
693 vect_pattern*
694 complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
695 slp_tree *node)
697 auto_vec<slp_tree> ops;
698 complex_operation_t op
699 = vect_detect_pair_op (*node, true, &ops);
700 internal_fn ifn
701 = complex_add_pattern::matches (op, perm_cache, node, &ops);
702 if (ifn == IFN_LAST)
703 return NULL;
705 return new complex_add_pattern (node, &ops, ifn);
708 /*******************************************************************************
709 * complex_mul_pattern
710 ******************************************************************************/
712 /* Check to see if either of the trees in ARGS are a NEGATE_EXPR. If the first
713 child (args[0]) is a NEGATE_EXPR then NEG_FIRST_P is set to TRUE.
715 If a negate is found then the values in ARGS are reordered such that the
716 negate node is always the second one and the entry is replaced by the child
717 of the negate node. */
719 static inline bool
720 vect_normalize_conj_loc (vec<slp_tree> &args, bool *neg_first_p = NULL)
722 gcc_assert (args.length () == 2);
723 bool neg_found = false;
725 if (vect_match_expression_p (args[0], NEGATE_EXPR))
727 std::swap (args[0], args[1]);
728 neg_found = true;
729 if (neg_first_p)
730 *neg_first_p = true;
732 else if (vect_match_expression_p (args[1], NEGATE_EXPR))
734 neg_found = true;
735 if (neg_first_p)
736 *neg_first_p = false;
739 if (neg_found)
740 args[1] = SLP_TREE_CHILDREN (args[1])[0];
742 return neg_found;
745 /* Helper function to check if PERM is KIND or PERM_TOP. */
747 static inline bool
748 is_eq_or_top (complex_perm_kinds_t perm, complex_perm_kinds_t kind)
750 return perm == kind || perm == PERM_TOP;
753 /* Helper function that checks to see if LEFT_OP and RIGHT_OP are both MULT_EXPR
754 nodes but also that they represent an operation that is either a complex
755 multiplication or a complex multiplication by conjugated value.
757 Of the negation is expected to be in the first half of the tree (As required
758 by an FMS pattern) then NEG_FIRST is true. If the operation is a conjugate
759 operation then CONJ_FIRST_OPERAND is set to indicate whether the first or
760 second operand contains the conjugate operation. */
762 static inline bool
763 vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
764 const vec<slp_tree> &left_op,
765 const vec<slp_tree> &right_op,
766 bool neg_first, bool *conj_first_operand,
767 bool fms)
769 /* The presence of a negation indicates that we have either a conjugate or a
770 rotation. We need to distinguish which one. */
771 *conj_first_operand = false;
772 complex_perm_kinds_t kind;
774 /* Complex conjugates have the negation on the imaginary part of the
775 number where rotations affect the real component. So check if the
776 negation is on a dup of lane 1. */
777 if (fms)
779 /* Canonicalization for fms is not consistent. So have to test both
780 variants to be sure. This needs to be fixed in the mid-end so
781 this part can be simpler. */
782 kind = linear_loads_p (perm_cache, right_op[0]);
783 if (!((is_eq_or_top (linear_loads_p (perm_cache, right_op[0]), PERM_ODDODD)
784 && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
785 PERM_ODDEVEN))
786 || (kind == PERM_ODDEVEN
787 && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
788 PERM_ODDODD))))
789 return false;
791 else
793 if (linear_loads_p (perm_cache, right_op[1]) != PERM_ODDODD
794 && !is_eq_or_top (linear_loads_p (perm_cache, right_op[0]),
795 PERM_ODDEVEN))
796 return false;
799 /* Deal with differences in indexes. */
800 int index1 = fms ? 1 : 0;
801 int index2 = fms ? 0 : 1;
803 /* Check if the conjugate is on the second first or second operand. The
804 order of the node with the conjugate value determines this, and the dup
805 node must be one of lane 0 of the same DR as the neg node. */
806 kind = linear_loads_p (perm_cache, left_op[index1]);
807 if (kind == PERM_TOP)
809 if (linear_loads_p (perm_cache, left_op[index2]) == PERM_EVENODD)
810 return true;
812 else if (kind == PERM_EVENODD)
814 if ((kind = linear_loads_p (perm_cache, left_op[index2])) == PERM_EVENODD)
815 return false;
816 return true;
818 else if (!neg_first)
819 *conj_first_operand = true;
820 else
821 return false;
823 if (kind != PERM_EVENEVEN)
824 return false;
826 return true;
829 /* Helper function to help distinguish between a conjugate and a rotation in a
830 complex multiplication. The operations have similar shapes but the order of
831 the load permutes are different. This function returns TRUE when the order
832 is consistent with a multiplication or multiplication by conjugated
833 operand but returns FALSE if it's a multiplication by rotated operand. */
835 static inline bool
836 vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
837 const vec<slp_tree> &op,
838 complex_perm_kinds_t permKind)
840 /* The left node is the more common case, test it first. */
841 if (!is_eq_or_top (linear_loads_p (perm_cache, op[0]), permKind))
843 if (!is_eq_or_top (linear_loads_p (perm_cache, op[1]), permKind))
844 return false;
846 return true;
849 /* This function combines two nodes containing only even and only odd lanes
850 together into a single node which contains the nodes in even/odd order
851 by using a lane permute.
853 The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
854 So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
856 The tree REPRESENTATION is taken from the supplied REP along with the
857 vectype which must be the same between all three nodes.
860 static slp_tree
861 vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
863 vec<std::pair<unsigned, unsigned> > perm;
864 perm.create (SLP_TREE_LANES (rep));
866 for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
868 perm.quick_push (std::make_pair (0, x));
869 perm.quick_push (std::make_pair (1, x+1));
872 slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
873 SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
874 SLP_TREE_LANE_PERMUTATION (vnode) = perm;
876 SLP_TREE_CHILDREN (vnode).create (2);
877 SLP_TREE_CHILDREN (vnode).quick_push (even);
878 SLP_TREE_CHILDREN (vnode).quick_push (odd);
879 SLP_TREE_REF_COUNT (even)++;
880 SLP_TREE_REF_COUNT (odd)++;
881 SLP_TREE_REF_COUNT (vnode) = 1;
883 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
884 gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
885 /* Representation is set to that of the current node as the vectorizer
886 can't deal with VEC_PERMs with no representation, as would be the
887 case with invariants. */
888 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
889 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
890 return vnode;
893 class complex_mul_pattern : public complex_pattern
895 protected:
896 complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
897 : complex_pattern (node, m_ops, ifn)
899 this->m_num_args = 2;
902 public:
903 void build (vec_info *);
904 static internal_fn
905 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
906 vec<slp_tree> *);
908 static vect_pattern*
909 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
911 static vect_pattern*
912 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
914 return new complex_mul_pattern (node, m_ops, ifn);
919 /* Pattern matcher for trying to match complex multiply and complex multiply
920 and accumulate pattern in SLP tree. If the operation matches then IFN
921 is set to the operation it matched and the arguments to the two
922 replacement statements are put in m_ops.
924 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
926 This function matches the patterns shaped as:
928 double ax = (b[i+1] * a[i]);
929 double bx = (a[i+1] * b[i]);
931 c[i] = c[i] - ax;
932 c[i+1] = c[i+1] + bx;
934 If a match occurred then TRUE is returned, else FALSE. The initial match is
935 expected to be in OP1 and the initial match operands in args0. */
937 internal_fn
938 complex_mul_pattern::matches (complex_operation_t op,
939 slp_tree_to_load_perm_map_t *perm_cache,
940 slp_tree *node, vec<slp_tree> *ops)
942 internal_fn ifn = IFN_LAST;
944 if (op != MINUS_PLUS)
945 return IFN_LAST;
947 auto childs = *ops;
948 auto l0node = SLP_TREE_CHILDREN (childs[0]);
949 auto l1node = SLP_TREE_CHILDREN (childs[1]);
951 bool mul0 = vect_match_expression_p (l0node[0], MULT_EXPR);
952 bool mul1 = vect_match_expression_p (l0node[1], MULT_EXPR);
953 if (!mul0 && !mul1)
954 return IFN_LAST;
956 /* Now operand2+4 may lead to another expression. */
957 auto_vec<slp_tree> left_op, right_op;
958 slp_tree add0 = NULL;
960 /* Check if we may be a multiply add. */
961 if (!mul0
962 && vect_match_expression_p (l0node[0], PLUS_EXPR))
964 auto vals = SLP_TREE_CHILDREN (l0node[0]);
965 /* Check if it's a multiply, otherwise no idea what this is. */
966 if (!vect_match_expression_p (vals[1], MULT_EXPR))
967 return IFN_LAST;
969 /* Check if the ADD is linear, otherwise it's not valid complex FMA. */
970 if (linear_loads_p (perm_cache, vals[0]) != PERM_EVENODD)
971 return IFN_LAST;
973 left_op.safe_splice (SLP_TREE_CHILDREN (vals[1]));
974 add0 = vals[0];
976 else
977 left_op.safe_splice (SLP_TREE_CHILDREN (l0node[0]));
979 right_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
981 if (left_op.length () != 2
982 || right_op.length () != 2
983 || linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
984 return IFN_LAST;
986 bool neg_first = false;
987 bool conj_first_operand = false;
988 bool is_neg = vect_normalize_conj_loc (right_op, &neg_first);
990 if (!is_neg)
992 /* A multiplication needs to multiply agains the real pair, otherwise
993 the pattern matches that of FMS. */
994 if (!vect_validate_multiplication (perm_cache, left_op, PERM_EVENEVEN)
995 || vect_normalize_conj_loc (left_op))
996 return IFN_LAST;
997 if (!mul0)
998 ifn = IFN_COMPLEX_FMA;
999 else
1000 ifn = IFN_COMPLEX_MUL;
1002 else
1004 if (!vect_validate_multiplication (perm_cache, left_op, right_op,
1005 neg_first, &conj_first_operand,
1006 false))
1007 return IFN_LAST;
1009 if(!mul0)
1010 ifn = IFN_COMPLEX_FMA_CONJ;
1011 else
1012 ifn = IFN_COMPLEX_MUL_CONJ;
1015 if (!vect_pattern_validate_optab (ifn, *node))
1016 return IFN_LAST;
1018 ops->truncate (0);
1019 ops->create (add0 ? 4 : 3);
1021 if (add0)
1022 ops->quick_push (add0);
1024 complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]);
1025 if (kind == PERM_EVENODD)
1027 ops->quick_push (left_op[1]);
1028 ops->quick_push (right_op[1]);
1029 ops->quick_push (left_op[0]);
1031 else if (kind == PERM_TOP)
1033 ops->quick_push (left_op[1]);
1034 ops->quick_push (right_op[1]);
1035 ops->quick_push (left_op[0]);
1037 else if (kind == PERM_EVENEVEN && !conj_first_operand)
1039 ops->quick_push (left_op[0]);
1040 ops->quick_push (right_op[0]);
1041 ops->quick_push (left_op[1]);
1043 else
1045 ops->quick_push (left_op[0]);
1046 ops->quick_push (right_op[1]);
1047 ops->quick_push (left_op[1]);
1050 return ifn;
1053 /* Attempt to recognize a complex mul pattern. */
1055 vect_pattern*
1056 complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1057 slp_tree *node)
1059 auto_vec<slp_tree> ops;
1060 complex_operation_t op
1061 = vect_detect_pair_op (*node, true, &ops);
1062 internal_fn ifn
1063 = complex_mul_pattern::matches (op, perm_cache, node, &ops);
1064 if (ifn == IFN_LAST)
1065 return NULL;
1067 return new complex_mul_pattern (node, &ops, ifn);
1070 /* Perform a replacement of the detected complex mul pattern with the new
1071 instruction sequences. */
1073 void
1074 complex_mul_pattern::build (vec_info *vinfo)
1076 slp_tree node;
1077 unsigned i;
1078 switch (this->m_ifn)
1080 case IFN_COMPLEX_MUL:
1081 case IFN_COMPLEX_MUL_CONJ:
1083 slp_tree newnode
1084 = vect_build_combine_node (this->m_ops[0], this->m_ops[1],
1085 *this->m_node);
1086 SLP_TREE_REF_COUNT (this->m_ops[2])++;
1088 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1089 vect_free_slp_tree (node);
1091 /* First re-arrange the children. */
1092 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
1093 SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
1094 SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1095 break;
1097 case IFN_COMPLEX_FMA:
1098 case IFN_COMPLEX_FMA_CONJ:
1100 SLP_TREE_REF_COUNT (this->m_ops[0])++;
1101 slp_tree newnode
1102 = vect_build_combine_node (this->m_ops[1], this->m_ops[2],
1103 *this->m_node);
1104 SLP_TREE_REF_COUNT (this->m_ops[3])++;
1106 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1107 vect_free_slp_tree (node);
1109 /* First re-arrange the children. */
1110 SLP_TREE_CHILDREN (*this->m_node).safe_grow (3);
1111 SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[0];
1112 SLP_TREE_CHILDREN (*this->m_node)[1] = this->m_ops[3];
1113 SLP_TREE_CHILDREN (*this->m_node)[2] = newnode;
1115 /* Tell the builder to expect an extra argument. */
1116 this->m_num_args++;
1117 break;
1119 default:
1120 gcc_unreachable ();
1123 /* And then rewrite the node itself. */
1124 complex_pattern::build (vinfo);
1127 /*******************************************************************************
1128 * complex_fms_pattern class
1129 ******************************************************************************/
1131 class complex_fms_pattern : public complex_pattern
1133 protected:
1134 complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1135 : complex_pattern (node, m_ops, ifn)
1137 this->m_num_args = 3;
1140 public:
1141 void build (vec_info *);
1142 static internal_fn
1143 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1144 vec<slp_tree> *);
1146 static vect_pattern*
1147 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1149 static vect_pattern*
1150 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1152 return new complex_fms_pattern (node, m_ops, ifn);
1157 /* Pattern matcher for trying to match complex multiply and subtract pattern
1158 in SLP tree. If the operation matches then IFN is set to the operation
1159 it matched and the arguments to the two replacement statements are put in
1160 m_ops.
1162 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1164 This function matches the patterns shaped as:
1166 double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1167 double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1169 c[i] = c[i] - ax;
1170 c[i+1] = c[i+1] + bx;
1172 If a match occurred then TRUE is returned, else FALSE. The initial match is
1173 expected to be in OP1 and the initial match operands in args0. */
1175 internal_fn
1176 complex_fms_pattern::matches (complex_operation_t op,
1177 slp_tree_to_load_perm_map_t *perm_cache,
1178 slp_tree * ref_node, vec<slp_tree> *ops)
1180 internal_fn ifn = IFN_LAST;
1182 /* We need to ignore the two_operands nodes that may also match,
1183 for that we can check if they have any scalar statements and also
1184 check that it's not a permute node as we're looking for a normal
1185 MINUS_EXPR operation. */
1186 if (op != CMPLX_NONE)
1187 return IFN_LAST;
1189 slp_tree root = *ref_node;
1190 if (!vect_match_expression_p (root, MINUS_EXPR))
1191 return IFN_LAST;
1193 auto nodes = SLP_TREE_CHILDREN (root);
1194 if (!vect_match_expression_p (nodes[1], MULT_EXPR)
1195 || vect_detect_pair_op (nodes[0]) != PLUS_MINUS)
1196 return IFN_LAST;
1198 auto childs = SLP_TREE_CHILDREN (nodes[0]);
1199 auto l0node = SLP_TREE_CHILDREN (childs[0]);
1201 /* Now operand2+4 may lead to another expression. */
1202 auto_vec<slp_tree> left_op, right_op;
1203 left_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1204 right_op.safe_splice (SLP_TREE_CHILDREN (nodes[1]));
1206 /* If these nodes don't have any children then they're
1207 not ones we're interested in. */
1208 if (left_op.length () != 2 || right_op.length () != 2)
1209 return IFN_LAST;
1211 bool is_neg = vect_normalize_conj_loc (left_op);
1213 bool conj_first_operand = false;
1214 if (!vect_validate_multiplication (perm_cache, right_op, left_op, false,
1215 &conj_first_operand, true))
1216 return IFN_LAST;
1218 if (!is_neg)
1219 ifn = IFN_COMPLEX_FMS;
1220 else if (is_neg)
1221 ifn = IFN_COMPLEX_FMS_CONJ;
1223 if (!vect_pattern_validate_optab (ifn, *ref_node))
1224 return IFN_LAST;
1226 ops->truncate (0);
1227 ops->create (4);
1229 complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]);
1230 if (kind == PERM_EVENODD)
1232 ops->quick_push (l0node[0]);
1233 ops->quick_push (right_op[0]);
1234 ops->quick_push (right_op[1]);
1235 ops->quick_push (left_op[1]);
1237 else if (kind == PERM_TOP)
1239 ops->quick_push (l0node[0]);
1240 ops->quick_push (right_op[1]);
1241 ops->quick_push (right_op[0]);
1242 ops->quick_push (left_op[0]);
1244 else if (kind == PERM_EVENEVEN && !is_neg)
1246 ops->quick_push (l0node[0]);
1247 ops->quick_push (right_op[1]);
1248 ops->quick_push (right_op[0]);
1249 ops->quick_push (left_op[0]);
1251 else
1253 ops->quick_push (l0node[0]);
1254 ops->quick_push (right_op[1]);
1255 ops->quick_push (right_op[0]);
1256 ops->quick_push (left_op[1]);
1259 return ifn;
1262 /* Attempt to recognize a complex mul pattern. */
1264 vect_pattern*
1265 complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1266 slp_tree *node)
1268 auto_vec<slp_tree> ops;
1269 complex_operation_t op
1270 = vect_detect_pair_op (*node, true, &ops);
1271 internal_fn ifn
1272 = complex_fms_pattern::matches (op, perm_cache, node, &ops);
1273 if (ifn == IFN_LAST)
1274 return NULL;
1276 return new complex_fms_pattern (node, &ops, ifn);
1279 /* Perform a replacement of the detected complex mul pattern with the new
1280 instruction sequences. */
1282 void
1283 complex_fms_pattern::build (vec_info *vinfo)
1285 slp_tree node;
1286 unsigned i;
1287 slp_tree newnode =
1288 vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
1289 SLP_TREE_REF_COUNT (this->m_ops[0])++;
1290 SLP_TREE_REF_COUNT (this->m_ops[1])++;
1292 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1293 vect_free_slp_tree (node);
1295 SLP_TREE_CHILDREN (*this->m_node).release ();
1296 SLP_TREE_CHILDREN (*this->m_node).create (3);
1298 /* First re-arrange the children. */
1299 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
1300 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
1301 SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
1303 /* And then rewrite the node itself. */
1304 complex_pattern::build (vinfo);
1307 /*******************************************************************************
1308 * complex_operations_pattern class
1309 ******************************************************************************/
1311 /* This function combines all the existing pattern matchers above into one class
1312 that shares the functionality between them. The initial match is shared
1313 between all complex operations. */
1315 class complex_operations_pattern : public complex_pattern
1317 protected:
1318 complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
1319 internal_fn ifn)
1320 : complex_pattern (node, m_ops, ifn)
1322 this->m_num_args = 0;
1325 public:
1326 void build (vec_info *);
1327 static internal_fn
1328 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1329 vec<slp_tree> *);
1331 static vect_pattern*
1332 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1335 /* Dummy matches implementation for proxy object. */
1337 internal_fn
1338 complex_operations_pattern::
1339 matches (complex_operation_t /* op */,
1340 slp_tree_to_load_perm_map_t * /* perm_cache */,
1341 slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
1343 return IFN_LAST;
1346 /* Attempt to recognize a complex mul pattern. */
1348 vect_pattern*
1349 complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1350 slp_tree *node)
1352 auto_vec<slp_tree> ops;
1353 complex_operation_t op
1354 = vect_detect_pair_op (*node, true, &ops);
1355 internal_fn ifn = IFN_LAST;
1357 ifn = complex_fms_pattern::matches (op, perm_cache, node, &ops);
1358 if (ifn != IFN_LAST)
1359 return complex_fms_pattern::mkInstance (node, &ops, ifn);
1361 ifn = complex_mul_pattern::matches (op, perm_cache, node, &ops);
1362 if (ifn != IFN_LAST)
1363 return complex_mul_pattern::mkInstance (node, &ops, ifn);
1365 ifn = complex_add_pattern::matches (op, perm_cache, node, &ops);
1366 if (ifn != IFN_LAST)
1367 return complex_add_pattern::mkInstance (node, &ops, ifn);
1369 return NULL;
1372 /* Dummy implementation of build. */
1374 void
1375 complex_operations_pattern::build (vec_info * /* vinfo */)
1377 gcc_unreachable ();
1381 /* The addsub_pattern. */
1383 class addsub_pattern : public vect_pattern
1385 public:
1386 addsub_pattern (slp_tree *node, internal_fn ifn)
1387 : vect_pattern (node, NULL, ifn) {};
1389 void build (vec_info *);
1391 static vect_pattern*
1392 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1395 vect_pattern *
1396 addsub_pattern::recognize (slp_tree_to_load_perm_map_t *, slp_tree *node_)
1398 slp_tree node = *node_;
1399 if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
1400 || SLP_TREE_CHILDREN (node).length () != 2
1401 || SLP_TREE_LANE_PERMUTATION (node).length () % 2)
1402 return NULL;
1404 /* Match a blend of a plus and a minus op with the same number of plus and
1405 minus lanes on the same operands. */
1406 unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1407 unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1408 if (l0 == l1)
1409 return NULL;
1410 bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
1411 PLUS_EXPR);
1412 if (!l0add_p
1413 && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
1414 return NULL;
1415 bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
1416 PLUS_EXPR);
1417 if (!l1add_p
1418 && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
1419 return NULL;
1421 slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
1422 slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
1423 if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
1424 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
1425 || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
1426 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
1427 return NULL;
1429 for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
1431 std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
1432 /* It has to be alternating -, +, -,
1433 While we could permute the .ADDSUB inputs and the .ADDSUB output
1434 that's only profitable over the add + sub + blend if at least
1435 one of the permute is optimized which we can't determine here. */
1436 if (perm.first != ((i & 1) ? l1 : l0)
1437 || perm.second != i)
1438 return NULL;
1441 /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
1442 (l0add_p), see whether we have FMA variants. */
1443 if (!l0add_p
1444 && vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR))
1446 /* (c * d) -+ a */
1447 if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
1448 return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
1450 else if (l0add_p
1451 && vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR))
1453 /* (c * d) +- a */
1454 if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
1455 return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
1458 if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
1459 return new addsub_pattern (node_, IFN_VEC_ADDSUB);
1461 return NULL;
1464 void
1465 addsub_pattern::build (vec_info *vinfo)
1467 slp_tree node = *m_node;
1469 unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1470 unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1472 switch (m_ifn)
1474 case IFN_VEC_ADDSUB:
1476 slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
1477 slp_tree add = SLP_TREE_CHILDREN (node)[l1];
1479 /* Modify the blend node in-place. */
1480 SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
1481 SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
1482 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1483 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1485 /* Build IFN_VEC_ADDSUB from the sub representative operands. */
1486 stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
1487 gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
1488 gimple_assign_rhs1 (rep->stmt),
1489 gimple_assign_rhs2 (rep->stmt));
1490 gimple_call_set_lhs (call, make_ssa_name
1491 (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
1492 gimple_call_set_nothrow (call, true);
1493 gimple_set_bb (call, gimple_bb (rep->stmt));
1494 stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
1495 SLP_TREE_REPRESENTATIVE (node) = new_rep;
1496 STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1497 STMT_SLP_TYPE (new_rep) = pure_slp;
1498 STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1499 STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1500 STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
1501 SLP_TREE_CODE (node) = ERROR_MARK;
1502 SLP_TREE_LANE_PERMUTATION (node).release ();
1504 vect_free_slp_tree (sub);
1505 vect_free_slp_tree (add);
1506 break;
1508 case IFN_VEC_FMADDSUB:
1509 case IFN_VEC_FMSUBADD:
1511 slp_tree sub, add;
1512 if (m_ifn == IFN_VEC_FMADDSUB)
1514 sub = SLP_TREE_CHILDREN (node)[l0];
1515 add = SLP_TREE_CHILDREN (node)[l1];
1517 else /* m_ifn == IFN_VEC_FMSUBADD */
1519 sub = SLP_TREE_CHILDREN (node)[l1];
1520 add = SLP_TREE_CHILDREN (node)[l0];
1522 slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
1523 /* Modify the blend node in-place. */
1524 SLP_TREE_CHILDREN (node).safe_grow (3, true);
1525 SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
1526 SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
1527 SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
1528 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1529 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1530 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
1532 /* Build IFN_VEC_FMADDSUB from the mul/sub representative operands. */
1533 stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
1534 stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
1535 gcall *call = gimple_build_call_internal (m_ifn, 3,
1536 gimple_assign_rhs1 (mrep->stmt),
1537 gimple_assign_rhs2 (mrep->stmt),
1538 gimple_assign_rhs2 (srep->stmt));
1539 gimple_call_set_lhs (call, make_ssa_name
1540 (TREE_TYPE (gimple_assign_lhs (srep->stmt))));
1541 gimple_call_set_nothrow (call, true);
1542 gimple_set_bb (call, gimple_bb (srep->stmt));
1543 stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
1544 SLP_TREE_REPRESENTATIVE (node) = new_rep;
1545 STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1546 STMT_SLP_TYPE (new_rep) = pure_slp;
1547 STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1548 STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1549 STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep));
1550 SLP_TREE_CODE (node) = ERROR_MARK;
1551 SLP_TREE_LANE_PERMUTATION (node).release ();
1553 vect_free_slp_tree (sub);
1554 vect_free_slp_tree (add);
1555 break;
1557 default:;
1561 /*******************************************************************************
1562 * Pattern matching definitions
1563 ******************************************************************************/
1565 #define SLP_PATTERN(x) &x::recognize
1566 vect_pattern_decl_t slp_patterns[]
1568 /* For least amount of back-tracking and more efficient matching
1569 order patterns from the largest to the smallest. Especially if they
1570 overlap in what they can detect. */
1572 SLP_PATTERN (complex_operations_pattern),
1573 SLP_PATTERN (addsub_pattern)
1575 #undef SLP_PATTERN
1577 /* Set the number of SLP pattern matchers available. */
1578 size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(vect_pattern_decl_t);