1 /* Helpers for evaluating polynomials with various schemes - specific to SVE
2 but precision-agnostic.
4 Copyright (C) 2023-2024 Free Software Foundation, Inc.
5 This file is part of the GNU C Library.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <https://www.gnu.org/licenses/>. */
22 # error Cannot use poly_generic without defining VTYPE
25 # error Cannot use poly_generic without defining STYPE
28 # error Cannot use poly_generic without defining VWRAP
31 # error Cannot use poly_generic without defining DUP
34 static inline VTYPE
VWRAP (pairwise_poly_3
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
37 /* At order 3, Estrin and Pairwise Horner are identical. */
38 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
39 VTYPE p23
= svmla_x (pg
, DUP (poly
[2]), x
, poly
[3]);
40 return svmla_x (pg
, p01
, p23
, x2
);
43 static inline VTYPE
VWRAP (estrin_4
) (svbool_t pg
, VTYPE x
, VTYPE x2
, VTYPE x4
,
46 VTYPE p03
= VWRAP (pairwise_poly_3
) (pg
, x
, x2
, poly
);
47 return svmla_x (pg
, p03
, x4
, poly
[4]);
49 static inline VTYPE
VWRAP (estrin_5
) (svbool_t pg
, VTYPE x
, VTYPE x2
, VTYPE x4
,
52 VTYPE p03
= VWRAP (pairwise_poly_3
) (pg
, x
, x2
, poly
);
53 VTYPE p45
= svmla_x (pg
, DUP (poly
[4]), x
, poly
[5]);
54 return svmla_x (pg
, p03
, p45
, x4
);
56 static inline VTYPE
VWRAP (estrin_6
) (svbool_t pg
, VTYPE x
, VTYPE x2
, VTYPE x4
,
59 VTYPE p03
= VWRAP (pairwise_poly_3
) (pg
, x
, x2
, poly
);
60 VTYPE p45
= svmla_x (pg
, DUP (poly
[4]), x
, poly
[5]);
61 VTYPE p46
= svmla_x (pg
, p45
, x
, poly
[6]);
62 return svmla_x (pg
, p03
, p46
, x4
);
64 static inline VTYPE
VWRAP (estrin_7
) (svbool_t pg
, VTYPE x
, VTYPE x2
, VTYPE x4
,
67 VTYPE p03
= VWRAP (pairwise_poly_3
) (pg
, x
, x2
, poly
);
68 VTYPE p47
= VWRAP (pairwise_poly_3
) (pg
, x
, x2
, poly
+ 4);
69 return svmla_x (pg
, p03
, p47
, x4
);
71 static inline VTYPE
VWRAP (estrin_8
) (svbool_t pg
, VTYPE x
, VTYPE x2
, VTYPE x4
,
72 VTYPE x8
, const STYPE
*poly
)
74 return svmla_x (pg
, VWRAP (estrin_7
) (pg
, x
, x2
, x4
, poly
), x8
, poly
[8]);
76 static inline VTYPE
VWRAP (estrin_9
) (svbool_t pg
, VTYPE x
, VTYPE x2
, VTYPE x4
,
77 VTYPE x8
, const STYPE
*poly
)
79 VTYPE p89
= svmla_x (pg
, DUP (poly
[8]), x
, poly
[9]);
80 return svmla_x (pg
, VWRAP (estrin_7
) (pg
, x
, x2
, x4
, poly
), p89
, x8
);
82 static inline VTYPE
VWRAP (estrin_10
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
83 VTYPE x4
, VTYPE x8
, const STYPE
*poly
)
85 VTYPE p89
= svmla_x (pg
, DUP (poly
[8]), x
, poly
[9]);
86 VTYPE p8_10
= svmla_x (pg
, p89
, x2
, poly
[10]);
87 return svmla_x (pg
, VWRAP (estrin_7
) (pg
, x
, x2
, x4
, poly
), p8_10
, x8
);
89 static inline VTYPE
VWRAP (estrin_11
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
90 VTYPE x4
, VTYPE x8
, const STYPE
*poly
)
92 VTYPE p8_11
= VWRAP (pairwise_poly_3
) (pg
, x
, x2
, poly
+ 8);
93 return svmla_x (pg
, VWRAP (estrin_7
) (pg
, x
, x2
, x4
, poly
), p8_11
, x8
);
95 static inline VTYPE
VWRAP (estrin_12
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
96 VTYPE x4
, VTYPE x8
, const STYPE
*poly
)
98 return svmla_x (pg
, VWRAP (estrin_7
) (pg
, x
, x2
, x4
, poly
),
99 VWRAP (estrin_4
) (pg
, x
, x2
, x4
, poly
+ 8), x8
);
101 static inline VTYPE
VWRAP (estrin_13
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
102 VTYPE x4
, VTYPE x8
, const STYPE
*poly
)
104 return svmla_x (pg
, VWRAP (estrin_7
) (pg
, x
, x2
, x4
, poly
),
105 VWRAP (estrin_5
) (pg
, x
, x2
, x4
, poly
+ 8), x8
);
107 static inline VTYPE
VWRAP (estrin_14
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
108 VTYPE x4
, VTYPE x8
, const STYPE
*poly
)
110 return svmla_x (pg
, VWRAP (estrin_7
) (pg
, x
, x2
, x4
, poly
),
111 VWRAP (estrin_6
) (pg
, x
, x2
, x4
, poly
+ 8), x8
);
113 static inline VTYPE
VWRAP (estrin_15
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
114 VTYPE x4
, VTYPE x8
, const STYPE
*poly
)
116 return svmla_x (pg
, VWRAP (estrin_7
) (pg
, x
, x2
, x4
, poly
),
117 VWRAP (estrin_7
) (pg
, x
, x2
, x4
, poly
+ 8), x8
);
119 static inline VTYPE
VWRAP (estrin_16
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
120 VTYPE x4
, VTYPE x8
, VTYPE x16
,
123 return svmla_x (pg
, VWRAP (estrin_15
) (pg
, x
, x2
, x4
, x8
, poly
), x16
,
126 static inline VTYPE
VWRAP (estrin_17
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
127 VTYPE x4
, VTYPE x8
, VTYPE x16
,
130 VTYPE p16_17
= svmla_x (pg
, DUP (poly
[16]), x
, poly
[17]);
131 return svmla_x (pg
, VWRAP (estrin_15
) (pg
, x
, x2
, x4
, x8
, poly
), p16_17
,
134 static inline VTYPE
VWRAP (estrin_18
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
135 VTYPE x4
, VTYPE x8
, VTYPE x16
,
138 VTYPE p16_17
= svmla_x (pg
, DUP (poly
[16]), x
, poly
[17]);
139 VTYPE p16_18
= svmla_x (pg
, p16_17
, x2
, poly
[18]);
140 return svmla_x (pg
, VWRAP (estrin_15
) (pg
, x
, x2
, x4
, x8
, poly
), p16_18
,
143 static inline VTYPE
VWRAP (estrin_19
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
144 VTYPE x4
, VTYPE x8
, VTYPE x16
,
147 return svmla_x (pg
, VWRAP (estrin_15
) (pg
, x
, x2
, x4
, x8
, poly
),
148 VWRAP (pairwise_poly_3
) (pg
, x
, x2
, poly
+ 16), x16
);
151 static inline VTYPE
VWRAP (horner_3
) (svbool_t pg
, VTYPE x
, const STYPE
*poly
)
153 VTYPE p
= svmla_x (pg
, DUP (poly
[2]), x
, poly
[3]);
154 p
= svmad_x (pg
, x
, p
, poly
[1]);
155 p
= svmad_x (pg
, x
, p
, poly
[0]);
158 static inline VTYPE
VWRAP (horner_4
) (svbool_t pg
, VTYPE x
, const STYPE
*poly
)
160 VTYPE p
= svmla_x (pg
, DUP (poly
[3]), x
, poly
[4]);
161 p
= svmad_x (pg
, x
, p
, poly
[2]);
162 p
= svmad_x (pg
, x
, p
, poly
[1]);
163 p
= svmad_x (pg
, x
, p
, poly
[0]);
166 static inline VTYPE
VWRAP (horner_5
) (svbool_t pg
, VTYPE x
, const STYPE
*poly
)
168 return svmad_x (pg
, x
, VWRAP (horner_4
) (pg
, x
, poly
+ 1), poly
[0]);
170 static inline VTYPE
VWRAP (horner_6
) (svbool_t pg
, VTYPE x
, const STYPE
*poly
)
172 return svmad_x (pg
, x
, VWRAP (horner_5
) (pg
, x
, poly
+ 1), poly
[0]);
174 static inline VTYPE
VWRAP (horner_7
) (svbool_t pg
, VTYPE x
, const STYPE
*poly
)
176 return svmad_x (pg
, x
, VWRAP (horner_6
) (pg
, x
, poly
+ 1), poly
[0]);
178 static inline VTYPE
VWRAP (horner_8
) (svbool_t pg
, VTYPE x
, const STYPE
*poly
)
180 return svmad_x (pg
, x
, VWRAP (horner_7
) (pg
, x
, poly
+ 1), poly
[0]);
182 static inline VTYPE
VWRAP (horner_9
) (svbool_t pg
, VTYPE x
, const STYPE
*poly
)
184 return svmad_x (pg
, x
, VWRAP (horner_8
) (pg
, x
, poly
+ 1), poly
[0]);
187 sv_horner_10_f32_x (svbool_t pg
, VTYPE x
, const STYPE
*poly
)
189 return svmad_x (pg
, x
, VWRAP (horner_9
) (pg
, x
, poly
+ 1), poly
[0]);
192 sv_horner_11_f32_x (svbool_t pg
, VTYPE x
, const STYPE
*poly
)
194 return svmad_x (pg
, x
, sv_horner_10_f32_x (pg
, x
, poly
+ 1), poly
[0]);
197 sv_horner_12_f32_x (svbool_t pg
, VTYPE x
, const STYPE
*poly
)
199 return svmad_x (pg
, x
, sv_horner_11_f32_x (pg
, x
, poly
+ 1), poly
[0]);
202 static inline VTYPE
VWRAP (pw_horner_4
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
205 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
206 VTYPE p23
= svmla_x (pg
, DUP (poly
[2]), x
, poly
[3]);
208 p
= svmla_x (pg
, p23
, x2
, poly
[4]);
209 p
= svmla_x (pg
, p01
, x2
, p
);
212 static inline VTYPE
VWRAP (pw_horner_5
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
215 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
216 VTYPE p23
= svmla_x (pg
, DUP (poly
[2]), x
, poly
[3]);
217 VTYPE p45
= svmla_x (pg
, DUP (poly
[4]), x
, poly
[5]);
219 p
= svmla_x (pg
, p23
, x2
, p45
);
220 p
= svmla_x (pg
, p01
, x2
, p
);
223 static inline VTYPE
VWRAP (pw_horner_6
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
226 VTYPE p26
= VWRAP (pw_horner_4
) (pg
, x
, x2
, poly
+ 2);
227 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
228 return svmla_x (pg
, p01
, x2
, p26
);
230 static inline VTYPE
VWRAP (pw_horner_7
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
233 VTYPE p27
= VWRAP (pw_horner_5
) (pg
, x
, x2
, poly
+ 2);
234 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
235 return svmla_x (pg
, p01
, x2
, p27
);
237 static inline VTYPE
VWRAP (pw_horner_8
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
240 VTYPE p28
= VWRAP (pw_horner_6
) (pg
, x
, x2
, poly
+ 2);
241 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
242 return svmla_x (pg
, p01
, x2
, p28
);
244 static inline VTYPE
VWRAP (pw_horner_9
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
247 VTYPE p29
= VWRAP (pw_horner_7
) (pg
, x
, x2
, poly
+ 2);
248 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
249 return svmla_x (pg
, p01
, x2
, p29
);
251 static inline VTYPE
VWRAP (pw_horner_10
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
254 VTYPE p2_10
= VWRAP (pw_horner_8
) (pg
, x
, x2
, poly
+ 2);
255 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
256 return svmla_x (pg
, p01
, x2
, p2_10
);
258 static inline VTYPE
VWRAP (pw_horner_11
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
261 VTYPE p2_11
= VWRAP (pw_horner_9
) (pg
, x
, x2
, poly
+ 2);
262 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
263 return svmla_x (pg
, p01
, x2
, p2_11
);
265 static inline VTYPE
VWRAP (pw_horner_12
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
268 VTYPE p2_12
= VWRAP (pw_horner_10
) (pg
, x
, x2
, poly
+ 2);
269 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
270 return svmla_x (pg
, p01
, x2
, p2_12
);
272 static inline VTYPE
VWRAP (pw_horner_13
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
275 VTYPE p2_13
= VWRAP (pw_horner_11
) (pg
, x
, x2
, poly
+ 2);
276 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
277 return svmla_x (pg
, p01
, x2
, p2_13
);
279 static inline VTYPE
VWRAP (pw_horner_14
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
282 VTYPE p2_14
= VWRAP (pw_horner_12
) (pg
, x
, x2
, poly
+ 2);
283 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
284 return svmla_x (pg
, p01
, x2
, p2_14
);
286 static inline VTYPE
VWRAP (pw_horner_15
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
289 VTYPE p2_15
= VWRAP (pw_horner_13
) (pg
, x
, x2
, poly
+ 2);
290 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
291 return svmla_x (pg
, p01
, x2
, p2_15
);
293 static inline VTYPE
VWRAP (pw_horner_16
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
296 VTYPE p2_16
= VWRAP (pw_horner_14
) (pg
, x
, x2
, poly
+ 2);
297 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
298 return svmla_x (pg
, p01
, x2
, p2_16
);
300 static inline VTYPE
VWRAP (pw_horner_17
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
303 VTYPE p2_17
= VWRAP (pw_horner_15
) (pg
, x
, x2
, poly
+ 2);
304 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
305 return svmla_x (pg
, p01
, x2
, p2_17
);
307 static inline VTYPE
VWRAP (pw_horner_18
) (svbool_t pg
, VTYPE x
, VTYPE x2
,
310 VTYPE p2_18
= VWRAP (pw_horner_16
) (pg
, x
, x2
, poly
+ 2);
311 VTYPE p01
= svmla_x (pg
, DUP (poly
[0]), x
, poly
[1]);
312 return svmla_x (pg
, p01
, x2
, p2_18
);